mirror of
https://github.com/Start9Labs/start-os.git
synced 2026-04-01 21:13:09 +00:00
rename frontend to web and update contributing guide (#2509)
* rename frontend to web and update contributing guide * rename this time * fix build * restructure rust code * update documentation * update descriptions * Update CONTRIBUTING.md Co-authored-by: J H <2364004+Blu-J@users.noreply.github.com> --------- Co-authored-by: Aiden McClelland <me@drbonez.dev> Co-authored-by: Aiden McClelland <3732071+dr-bonez@users.noreply.github.com> Co-authored-by: J H <2364004+Blu-J@users.noreply.github.com>
This commit is contained in:
56
core/startos/src/manager/health.rs
Normal file
56
core/startos/src/manager/health.rs
Normal file
@@ -0,0 +1,56 @@
|
||||
use models::OptionExt;
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::context::RpcContext;
|
||||
use crate::prelude::*;
|
||||
use crate::s9pk::manifest::PackageId;
|
||||
use crate::status::MainStatus;
|
||||
use crate::Error;
|
||||
|
||||
/// So, this is used for a service to run a health check cycle, go out and run the health checks, and store those in the db
|
||||
#[instrument(skip_all)]
|
||||
pub async fn check(ctx: &RpcContext, id: &PackageId) -> Result<(), Error> {
|
||||
let (manifest, started) = {
|
||||
let peeked = ctx.db.peek().await;
|
||||
let pde = peeked
|
||||
.as_package_data()
|
||||
.as_idx(id)
|
||||
.or_not_found(id)?
|
||||
.expect_as_installed()?;
|
||||
|
||||
let manifest = pde.as_installed().as_manifest().de()?;
|
||||
|
||||
let started = pde.as_installed().as_status().as_main().de()?.started();
|
||||
|
||||
(manifest, started)
|
||||
};
|
||||
|
||||
let health_results = if let Some(started) = started {
|
||||
tracing::debug!("Checking health of {}", id);
|
||||
manifest
|
||||
.health_checks
|
||||
.check_all(ctx, started, id, &manifest.version, &manifest.volumes)
|
||||
.await?
|
||||
} else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
ctx.db
|
||||
.mutate(|v| {
|
||||
let pde = v
|
||||
.as_package_data_mut()
|
||||
.as_idx_mut(id)
|
||||
.or_not_found(id)?
|
||||
.expect_as_installed_mut()?;
|
||||
let status = pde.as_installed_mut().as_status_mut().as_main_mut();
|
||||
|
||||
if let MainStatus::Running { health: _, started } = status.de()? {
|
||||
status.ser(&MainStatus::Running {
|
||||
health: health_results.clone(),
|
||||
started,
|
||||
})?;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
}
|
||||
300
core/startos/src/manager/manager_container.rs
Normal file
300
core/startos/src/manager/manager_container.rs
Normal file
@@ -0,0 +1,300 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use models::OptionExt;
|
||||
use tokio::sync::watch;
|
||||
use tokio::sync::watch::Sender;
|
||||
use tracing::instrument;
|
||||
|
||||
use super::start_stop::StartStop;
|
||||
use super::{manager_seed, run_main, ManagerPersistentContainer, RunMainResult};
|
||||
use crate::prelude::*;
|
||||
use crate::procedure::NoOutput;
|
||||
use crate::s9pk::manifest::Manifest;
|
||||
use crate::status::MainStatus;
|
||||
use crate::util::NonDetachingJoinHandle;
|
||||
use crate::Error;
|
||||
|
||||
pub type ManageContainerOverride = Arc<watch::Sender<Option<Override>>>;
|
||||
|
||||
pub type Override = MainStatus;
|
||||
|
||||
pub struct OverrideGuard {
|
||||
override_main_status: Option<ManageContainerOverride>,
|
||||
}
|
||||
impl OverrideGuard {
|
||||
pub fn drop(self) {}
|
||||
}
|
||||
impl Drop for OverrideGuard {
|
||||
fn drop(&mut self) {
|
||||
if let Some(override_main_status) = self.override_main_status.take() {
|
||||
override_main_status.send_modify(|x| {
|
||||
*x = None;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the thing describing the state machine actor for a service
|
||||
/// state and current running/ desired states.
|
||||
pub struct ManageContainer {
|
||||
pub(super) current_state: Arc<watch::Sender<StartStop>>,
|
||||
pub(super) desired_state: Arc<watch::Sender<StartStop>>,
|
||||
_service: NonDetachingJoinHandle<()>,
|
||||
_save_state: NonDetachingJoinHandle<()>,
|
||||
override_main_status: ManageContainerOverride,
|
||||
}
|
||||
|
||||
impl ManageContainer {
|
||||
pub async fn new(
|
||||
seed: Arc<manager_seed::ManagerSeed>,
|
||||
persistent_container: ManagerPersistentContainer,
|
||||
) -> Result<Self, Error> {
|
||||
let current_state = Arc::new(watch::channel(StartStop::Stop).0);
|
||||
let desired_state = Arc::new(
|
||||
watch::channel::<StartStop>(
|
||||
get_status(seed.ctx.db.peek().await, &seed.manifest).into(),
|
||||
)
|
||||
.0,
|
||||
);
|
||||
let override_main_status: ManageContainerOverride = Arc::new(watch::channel(None).0);
|
||||
let service = tokio::spawn(create_service_manager(
|
||||
desired_state.clone(),
|
||||
seed.clone(),
|
||||
current_state.clone(),
|
||||
persistent_container,
|
||||
))
|
||||
.into();
|
||||
let save_state = tokio::spawn(save_state(
|
||||
desired_state.clone(),
|
||||
current_state.clone(),
|
||||
override_main_status.clone(),
|
||||
seed.clone(),
|
||||
))
|
||||
.into();
|
||||
Ok(ManageContainer {
|
||||
current_state,
|
||||
desired_state,
|
||||
_service: service,
|
||||
override_main_status,
|
||||
_save_state: save_state,
|
||||
})
|
||||
}
|
||||
|
||||
/// Set override is used during something like a restart of a service. We want to show certain statuses be different
|
||||
/// from the actual status of the service.
|
||||
pub fn set_override(&self, override_status: Override) -> Result<OverrideGuard, Error> {
|
||||
let status = Some(override_status);
|
||||
if self.override_main_status.borrow().is_some() {
|
||||
return Err(Error::new(
|
||||
eyre!("Already have an override"),
|
||||
ErrorKind::InvalidRequest,
|
||||
));
|
||||
}
|
||||
self.override_main_status
|
||||
.send_modify(|x| *x = status.clone());
|
||||
Ok(OverrideGuard {
|
||||
override_main_status: Some(self.override_main_status.clone()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Set the override, but don't have a guard to revert it. Used only on the mananger to do a shutdown.
|
||||
pub(super) async fn lock_state_forever(
|
||||
&self,
|
||||
seed: &manager_seed::ManagerSeed,
|
||||
) -> Result<(), Error> {
|
||||
let current_state = get_status(seed.ctx.db.peek().await, &seed.manifest);
|
||||
self.override_main_status
|
||||
.send_modify(|x| *x = Some(current_state));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// We want to set the state of the service, like to start or stop
|
||||
pub fn to_desired(&self, new_state: StartStop) {
|
||||
self.desired_state.send_modify(|x| *x = new_state);
|
||||
}
|
||||
|
||||
/// This is a tool to say wait for the service to be in a certain state.
|
||||
pub async fn wait_for_desired(&self, new_state: StartStop) {
|
||||
let mut current_state = self.current_state();
|
||||
self.to_desired(new_state);
|
||||
while *current_state.borrow() != new_state {
|
||||
current_state.changed().await.unwrap_or_default();
|
||||
}
|
||||
}
|
||||
|
||||
/// Getter
|
||||
pub fn current_state(&self) -> watch::Receiver<StartStop> {
|
||||
self.current_state.subscribe()
|
||||
}
|
||||
|
||||
/// Getter
|
||||
pub fn desired_state(&self) -> watch::Receiver<StartStop> {
|
||||
self.desired_state.subscribe()
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_service_manager(
|
||||
desired_state: Arc<Sender<StartStop>>,
|
||||
seed: Arc<manager_seed::ManagerSeed>,
|
||||
current_state: Arc<Sender<StartStop>>,
|
||||
persistent_container: Arc<Option<super::persistent_container::PersistentContainer>>,
|
||||
) {
|
||||
let mut desired_state_receiver = desired_state.subscribe();
|
||||
let mut running_service: Option<NonDetachingJoinHandle<()>> = None;
|
||||
let seed = seed.clone();
|
||||
loop {
|
||||
let current: StartStop = *current_state.borrow();
|
||||
let desired: StartStop = *desired_state_receiver.borrow();
|
||||
match (current, desired) {
|
||||
(StartStop::Start, StartStop::Start) => (),
|
||||
(StartStop::Start, StartStop::Stop) => {
|
||||
if persistent_container.is_none() {
|
||||
if let Err(err) = seed.stop_container().await {
|
||||
tracing::error!("Could not stop container");
|
||||
tracing::debug!("{:?}", err)
|
||||
}
|
||||
running_service = None;
|
||||
} else if let Some(current_service) = running_service.take() {
|
||||
tokio::select! {
|
||||
_ = current_service => (),
|
||||
_ = tokio::time::sleep(Duration::from_secs_f64(seed.manifest
|
||||
.containers
|
||||
.as_ref()
|
||||
.and_then(|c| c.main.sigterm_timeout).map(|x| x.as_secs_f64()).unwrap_or_default())) => {
|
||||
tracing::error!("Could not stop service");
|
||||
}
|
||||
}
|
||||
}
|
||||
current_state.send_modify(|x| *x = StartStop::Stop);
|
||||
}
|
||||
(StartStop::Stop, StartStop::Start) => starting_service(
|
||||
current_state.clone(),
|
||||
desired_state.clone(),
|
||||
seed.clone(),
|
||||
persistent_container.clone(),
|
||||
&mut running_service,
|
||||
),
|
||||
(StartStop::Stop, StartStop::Stop) => (),
|
||||
}
|
||||
|
||||
if desired_state_receiver.changed().await.is_err() {
|
||||
tracing::error!("Desired state error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn save_state(
|
||||
desired_state: Arc<Sender<StartStop>>,
|
||||
current_state: Arc<Sender<StartStop>>,
|
||||
override_main_status: ManageContainerOverride,
|
||||
seed: Arc<manager_seed::ManagerSeed>,
|
||||
) {
|
||||
let mut desired_state_receiver = desired_state.subscribe();
|
||||
let mut current_state_receiver = current_state.subscribe();
|
||||
let mut override_main_status_receiver = override_main_status.subscribe();
|
||||
loop {
|
||||
let current: StartStop = *current_state_receiver.borrow();
|
||||
let desired: StartStop = *desired_state_receiver.borrow();
|
||||
let override_status = override_main_status_receiver.borrow().clone();
|
||||
let status = match (override_status.clone(), current, desired) {
|
||||
(Some(status), _, _) => status,
|
||||
(_, StartStop::Start, StartStop::Start) => MainStatus::Running {
|
||||
started: chrono::Utc::now(),
|
||||
health: Default::default(),
|
||||
},
|
||||
(_, StartStop::Start, StartStop::Stop) => MainStatus::Stopping,
|
||||
(_, StartStop::Stop, StartStop::Start) => MainStatus::Starting,
|
||||
(_, StartStop::Stop, StartStop::Stop) => MainStatus::Stopped,
|
||||
};
|
||||
|
||||
let manifest = &seed.manifest;
|
||||
if let Err(err) = seed
|
||||
.ctx
|
||||
.db
|
||||
.mutate(|db| set_status(db, manifest, &status))
|
||||
.await
|
||||
{
|
||||
tracing::error!("Did not set status for {}", seed.container_name);
|
||||
tracing::debug!("{:?}", err);
|
||||
}
|
||||
tokio::select! {
|
||||
_ = desired_state_receiver.changed() =>{},
|
||||
_ = current_state_receiver.changed() => {},
|
||||
_ = override_main_status_receiver.changed() => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn starting_service(
|
||||
current_state: Arc<Sender<StartStop>>,
|
||||
desired_state: Arc<Sender<StartStop>>,
|
||||
seed: Arc<manager_seed::ManagerSeed>,
|
||||
persistent_container: ManagerPersistentContainer,
|
||||
running_service: &mut Option<NonDetachingJoinHandle<()>>,
|
||||
) {
|
||||
let set_running = {
|
||||
let current_state = current_state.clone();
|
||||
Arc::new(move || {
|
||||
current_state.send_modify(|x| *x = StartStop::Start);
|
||||
})
|
||||
};
|
||||
let set_stopped = { move || current_state.send_modify(|x| *x = StartStop::Stop) };
|
||||
let running_main_loop = async move {
|
||||
while desired_state.borrow().is_start() {
|
||||
let result = run_main(
|
||||
seed.clone(),
|
||||
persistent_container.clone(),
|
||||
set_running.clone(),
|
||||
)
|
||||
.await;
|
||||
set_stopped();
|
||||
run_main_log_result(result, seed.clone()).await;
|
||||
}
|
||||
};
|
||||
*running_service = Some(tokio::spawn(running_main_loop).into());
|
||||
}
|
||||
|
||||
async fn run_main_log_result(result: RunMainResult, seed: Arc<manager_seed::ManagerSeed>) {
|
||||
match result {
|
||||
Ok(Ok(NoOutput)) => (), // restart
|
||||
Ok(Err(e)) => {
|
||||
tracing::error!(
|
||||
"The service {} has crashed with the following exit code: {}",
|
||||
seed.manifest.id.clone(),
|
||||
e.0
|
||||
);
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(15)).await;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("failed to start service: {}", e);
|
||||
tracing::debug!("{:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Used only in the mod where we are doing a backup
|
||||
#[instrument(skip(db, manifest))]
|
||||
pub(super) fn get_status(db: Peeked, manifest: &Manifest) -> MainStatus {
|
||||
db.as_package_data()
|
||||
.as_idx(&manifest.id)
|
||||
.and_then(|x| x.as_installed())
|
||||
.filter(|x| x.as_manifest().as_version().de().ok() == Some(manifest.version.clone()))
|
||||
.and_then(|x| x.as_status().as_main().de().ok())
|
||||
.unwrap_or(MainStatus::Stopped)
|
||||
}
|
||||
|
||||
#[instrument(skip(db, manifest))]
|
||||
fn set_status(db: &mut Peeked, manifest: &Manifest, main_status: &MainStatus) -> Result<(), Error> {
|
||||
let Some(installed) = db
|
||||
.as_package_data_mut()
|
||||
.as_idx_mut(&manifest.id)
|
||||
.or_not_found(&manifest.id)?
|
||||
.as_installed_mut()
|
||||
else {
|
||||
return Ok(());
|
||||
};
|
||||
installed.as_status_mut().as_main_mut().ser(main_status)
|
||||
}
|
||||
96
core/startos/src/manager/manager_map.rs
Normal file
96
core/startos/src/manager/manager_map.rs
Normal file
@@ -0,0 +1,96 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use color_eyre::eyre::eyre;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::instrument;
|
||||
|
||||
use super::Manager;
|
||||
use crate::context::RpcContext;
|
||||
use crate::prelude::*;
|
||||
use crate::s9pk::manifest::{Manifest, PackageId};
|
||||
use crate::util::Version;
|
||||
use crate::Error;
|
||||
|
||||
/// This is the structure to contain all the service managers
|
||||
#[derive(Default)]
|
||||
pub struct ManagerMap(RwLock<BTreeMap<(PackageId, Version), Arc<Manager>>>);
|
||||
impl ManagerMap {
|
||||
#[instrument(skip_all)]
|
||||
pub async fn init(&self, ctx: RpcContext, peeked: Peeked) -> Result<(), Error> {
|
||||
let mut res = BTreeMap::new();
|
||||
for package in peeked.as_package_data().keys()? {
|
||||
let man: Manifest = if let Some(manifest) = peeked
|
||||
.as_package_data()
|
||||
.as_idx(&package)
|
||||
.and_then(|x| x.as_installed())
|
||||
.map(|x| x.as_manifest().de())
|
||||
{
|
||||
manifest?
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
res.insert(
|
||||
(package, man.version.clone()),
|
||||
Arc::new(Manager::new(ctx.clone(), man).await?),
|
||||
);
|
||||
}
|
||||
*self.0.write().await = res;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Used during the install process
|
||||
#[instrument(skip_all)]
|
||||
pub async fn add(&self, ctx: RpcContext, manifest: Manifest) -> Result<Arc<Manager>, Error> {
|
||||
let mut lock = self.0.write().await;
|
||||
let id = (manifest.id.clone(), manifest.version.clone());
|
||||
if let Some(man) = lock.remove(&id) {
|
||||
man.exit().await;
|
||||
}
|
||||
let manager = Arc::new(Manager::new(ctx.clone(), manifest).await?);
|
||||
lock.insert(id, manager.clone());
|
||||
Ok(manager)
|
||||
}
|
||||
|
||||
/// This is ran during the cleanup, so when we are uninstalling the service
|
||||
#[instrument(skip_all)]
|
||||
pub async fn remove(&self, id: &(PackageId, Version)) {
|
||||
if let Some(man) = self.0.write().await.remove(id) {
|
||||
man.exit().await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Used during a shutdown
|
||||
#[instrument(skip_all)]
|
||||
pub async fn empty(&self) -> Result<(), Error> {
|
||||
let res =
|
||||
futures::future::join_all(std::mem::take(&mut *self.0.write().await).into_iter().map(
|
||||
|((id, version), man)| async move {
|
||||
tracing::debug!("Manager for {}@{} shutting down", id, version);
|
||||
man.shutdown().await?;
|
||||
tracing::debug!("Manager for {}@{} is shutdown", id, version);
|
||||
if let Err(e) = Arc::try_unwrap(man) {
|
||||
tracing::trace!(
|
||||
"Manager for {}@{} still has {} other open references",
|
||||
id,
|
||||
version,
|
||||
Arc::strong_count(&e) - 1
|
||||
);
|
||||
}
|
||||
Ok::<_, Error>(())
|
||||
},
|
||||
))
|
||||
.await;
|
||||
res.into_iter().fold(Ok(()), |res, x| match (res, x) {
|
||||
(Ok(()), x) => x,
|
||||
(Err(e), Ok(())) => Err(e),
|
||||
(Err(e1), Err(e2)) => Err(Error::new(eyre!("{}, {}", e1.source, e2.source), e1.kind)),
|
||||
})
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
pub async fn get(&self, id: &(PackageId, Version)) -> Option<Arc<Manager>> {
|
||||
self.0.read().await.get(id).cloned()
|
||||
}
|
||||
}
|
||||
37
core/startos/src/manager/manager_seed.rs
Normal file
37
core/startos/src/manager/manager_seed.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
use models::ErrorKind;
|
||||
|
||||
use crate::context::RpcContext;
|
||||
use crate::procedure::docker::DockerProcedure;
|
||||
use crate::procedure::PackageProcedure;
|
||||
use crate::s9pk::manifest::Manifest;
|
||||
use crate::util::docker::stop_container;
|
||||
use crate::Error;
|
||||
|
||||
/// This is helper structure for a service, the seed of the data that is needed for the manager_container
|
||||
pub struct ManagerSeed {
|
||||
pub ctx: RpcContext,
|
||||
pub manifest: Manifest,
|
||||
pub container_name: String,
|
||||
}
|
||||
|
||||
impl ManagerSeed {
|
||||
pub async fn stop_container(&self) -> Result<(), Error> {
|
||||
match stop_container(
|
||||
&self.container_name,
|
||||
match &self.manifest.main {
|
||||
PackageProcedure::Docker(DockerProcedure {
|
||||
sigterm_timeout: Some(sigterm_timeout),
|
||||
..
|
||||
}) => Some(**sigterm_timeout),
|
||||
_ => None,
|
||||
},
|
||||
None,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(e) if e.kind == ErrorKind::NotFound => (), // Already stopped
|
||||
a => a?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
888
core/startos/src/manager/mod.rs
Normal file
888
core/startos/src/manager/mod.rs
Normal file
@@ -0,0 +1,888 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::net::Ipv4Addr;
|
||||
use std::sync::Arc;
|
||||
use std::task::Poll;
|
||||
use std::time::Duration;
|
||||
|
||||
use color_eyre::eyre::eyre;
|
||||
use container_init::ProcessGroupId;
|
||||
use futures::future::BoxFuture;
|
||||
use futures::{Future, FutureExt, TryFutureExt};
|
||||
use helpers::UnixRpcClient;
|
||||
use models::{ErrorKind, OptionExt, PackageId};
|
||||
use nix::sys::signal::Signal;
|
||||
use persistent_container::PersistentContainer;
|
||||
use rand::SeedableRng;
|
||||
use sqlx::Connection;
|
||||
use start_stop::StartStop;
|
||||
use tokio::sync::watch::{self, Sender};
|
||||
use tokio::sync::{oneshot, Mutex};
|
||||
use tracing::instrument;
|
||||
use transition_state::TransitionState;
|
||||
|
||||
use crate::backup::target::PackageBackupInfo;
|
||||
use crate::backup::PackageBackupReport;
|
||||
use crate::config::action::ConfigRes;
|
||||
use crate::config::spec::ValueSpecPointer;
|
||||
use crate::config::ConfigureContext;
|
||||
use crate::context::RpcContext;
|
||||
use crate::db::model::{CurrentDependencies, CurrentDependencyInfo};
|
||||
use crate::dependencies::{
|
||||
add_dependent_to_current_dependents_lists, compute_dependency_config_errs,
|
||||
};
|
||||
use crate::disk::mount::backup::BackupMountGuard;
|
||||
use crate::disk::mount::guard::TmpMountGuard;
|
||||
use crate::install::cleanup::remove_from_current_dependents_lists;
|
||||
use crate::net::net_controller::NetService;
|
||||
use crate::net::vhost::AlpnInfo;
|
||||
use crate::prelude::*;
|
||||
use crate::procedure::docker::{DockerContainer, DockerProcedure, LongRunning};
|
||||
use crate::procedure::{NoOutput, ProcedureName};
|
||||
use crate::s9pk::manifest::Manifest;
|
||||
use crate::status::MainStatus;
|
||||
use crate::util::docker::{get_container_ip, kill_container};
|
||||
use crate::util::NonDetachingJoinHandle;
|
||||
use crate::volume::Volume;
|
||||
use crate::Error;
|
||||
|
||||
pub mod health;
|
||||
mod manager_container;
|
||||
mod manager_map;
|
||||
pub mod manager_seed;
|
||||
mod persistent_container;
|
||||
mod start_stop;
|
||||
mod transition_state;
|
||||
|
||||
pub use manager_map::ManagerMap;
|
||||
|
||||
use self::manager_container::{get_status, ManageContainer};
|
||||
use self::manager_seed::ManagerSeed;
|
||||
|
||||
pub const HEALTH_CHECK_COOLDOWN_SECONDS: u64 = 15;
|
||||
pub const HEALTH_CHECK_GRACE_PERIOD_SECONDS: u64 = 5;
|
||||
|
||||
type ManagerPersistentContainer = Arc<Option<PersistentContainer>>;
|
||||
type BackupGuard = Arc<Mutex<BackupMountGuard<TmpMountGuard>>>;
|
||||
pub enum BackupReturn {
|
||||
Error(Error),
|
||||
AlreadyRunning(PackageBackupReport),
|
||||
Ran {
|
||||
report: PackageBackupReport,
|
||||
res: Result<PackageBackupInfo, Error>,
|
||||
},
|
||||
}
|
||||
|
||||
pub struct Gid {
|
||||
next_gid: (watch::Sender<u32>, watch::Receiver<u32>),
|
||||
main_gid: (
|
||||
watch::Sender<ProcessGroupId>,
|
||||
watch::Receiver<ProcessGroupId>,
|
||||
),
|
||||
}
|
||||
|
||||
impl Default for Gid {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
next_gid: watch::channel(1),
|
||||
main_gid: watch::channel(ProcessGroupId(1)),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Gid {
|
||||
pub fn new_gid(&self) -> ProcessGroupId {
|
||||
let mut previous = 0;
|
||||
self.next_gid.0.send_modify(|x| {
|
||||
previous = *x;
|
||||
*x = previous + 1;
|
||||
});
|
||||
ProcessGroupId(previous)
|
||||
}
|
||||
|
||||
pub fn new_main_gid(&self) -> ProcessGroupId {
|
||||
let gid = self.new_gid();
|
||||
self.main_gid.0.send(gid).unwrap_or_default();
|
||||
gid
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the controller of the services. Here is where we can control a service with a start, stop, restart, etc.
|
||||
#[derive(Clone)]
|
||||
pub struct Manager {
|
||||
seed: Arc<ManagerSeed>,
|
||||
|
||||
manage_container: Arc<manager_container::ManageContainer>,
|
||||
transition: Arc<watch::Sender<TransitionState>>,
|
||||
persistent_container: ManagerPersistentContainer,
|
||||
|
||||
pub gid: Arc<Gid>,
|
||||
}
|
||||
impl Manager {
|
||||
pub async fn new(ctx: RpcContext, manifest: Manifest) -> Result<Self, Error> {
|
||||
let seed = Arc::new(ManagerSeed {
|
||||
ctx,
|
||||
container_name: DockerProcedure::container_name(&manifest.id, None),
|
||||
manifest,
|
||||
});
|
||||
|
||||
let persistent_container = Arc::new(PersistentContainer::init(&seed).await?);
|
||||
let manage_container = Arc::new(
|
||||
manager_container::ManageContainer::new(seed.clone(), persistent_container.clone())
|
||||
.await?,
|
||||
);
|
||||
let (transition, _) = watch::channel(Default::default());
|
||||
let transition = Arc::new(transition);
|
||||
Ok(Self {
|
||||
seed,
|
||||
manage_container,
|
||||
transition,
|
||||
persistent_container,
|
||||
gid: Default::default(),
|
||||
})
|
||||
}
|
||||
|
||||
/// awaiting this does not wait for the start to complete
|
||||
pub async fn start(&self) {
|
||||
if self._is_transition_restart() {
|
||||
return;
|
||||
}
|
||||
self._transition_abort().await;
|
||||
self.manage_container.to_desired(StartStop::Start);
|
||||
}
|
||||
|
||||
/// awaiting this does not wait for the stop to complete
|
||||
pub async fn stop(&self) {
|
||||
self._transition_abort().await;
|
||||
self.manage_container.to_desired(StartStop::Stop);
|
||||
}
|
||||
/// awaiting this does not wait for the restart to complete
|
||||
pub async fn restart(&self) {
|
||||
if self._is_transition_restart()
|
||||
&& *self.manage_container.desired_state().borrow() == StartStop::Stop
|
||||
{
|
||||
return;
|
||||
}
|
||||
if self.manage_container.desired_state().borrow().is_start() {
|
||||
self._transition_replace(self._transition_restart()).await;
|
||||
}
|
||||
}
|
||||
/// awaiting this does not wait for the restart to complete
|
||||
pub async fn configure(
|
||||
&self,
|
||||
configure_context: ConfigureContext,
|
||||
) -> Result<BTreeMap<PackageId, String>, Error> {
|
||||
if self._is_transition_restart() {
|
||||
self._transition_abort().await;
|
||||
} else if self._is_transition_backup() {
|
||||
return Err(Error::new(
|
||||
eyre!("Can't configure because service is backing up"),
|
||||
ErrorKind::InvalidRequest,
|
||||
));
|
||||
}
|
||||
let context = self.seed.ctx.clone();
|
||||
let id = self.seed.manifest.id.clone();
|
||||
|
||||
let breakages = configure(context, id, configure_context).await?;
|
||||
|
||||
self.restart().await;
|
||||
|
||||
Ok(breakages)
|
||||
}
|
||||
|
||||
/// awaiting this does not wait for the backup to complete
|
||||
pub async fn backup(&self, backup_guard: BackupGuard) -> BackupReturn {
|
||||
if self._is_transition_backup() {
|
||||
return BackupReturn::AlreadyRunning(PackageBackupReport {
|
||||
error: Some("Can't do backup because service is already backing up".to_owned()),
|
||||
});
|
||||
}
|
||||
let (transition_state, done) = self._transition_backup(backup_guard);
|
||||
self._transition_replace(transition_state).await;
|
||||
done.await
|
||||
}
|
||||
pub async fn exit(&self) {
|
||||
self._transition_abort().await;
|
||||
self.manage_container
|
||||
.wait_for_desired(StartStop::Stop)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// A special exit that is overridden the start state, should only be called in the shutdown, where we remove other containers
|
||||
async fn shutdown(&self) -> Result<(), Error> {
|
||||
self.manage_container.lock_state_forever(&self.seed).await?;
|
||||
|
||||
self.exit().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Used when we want to shutdown the service
|
||||
pub async fn signal(&self, signal: Signal) -> Result<(), Error> {
|
||||
let gid = self.gid.clone();
|
||||
send_signal(self, gid, signal).await
|
||||
}
|
||||
|
||||
/// Used as a getter, but also used in procedure
|
||||
pub fn rpc_client(&self) -> Option<Arc<UnixRpcClient>> {
|
||||
(*self.persistent_container)
|
||||
.as_ref()
|
||||
.map(|x| x.rpc_client())
|
||||
}
|
||||
|
||||
async fn _transition_abort(&self) {
|
||||
self.transition
|
||||
.send_replace(Default::default())
|
||||
.abort()
|
||||
.await;
|
||||
}
|
||||
async fn _transition_replace(&self, transition_state: TransitionState) {
|
||||
self.transition.send_replace(transition_state).abort().await;
|
||||
}
|
||||
|
||||
pub(super) fn perform_restart(&self) -> impl Future<Output = Result<(), Error>> + 'static {
|
||||
let manage_container = self.manage_container.clone();
|
||||
async move {
|
||||
let restart_override = manage_container.set_override(MainStatus::Restarting)?;
|
||||
manage_container.wait_for_desired(StartStop::Stop).await;
|
||||
manage_container.wait_for_desired(StartStop::Start).await;
|
||||
restart_override.drop();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
fn _transition_restart(&self) -> TransitionState {
|
||||
let transition = self.transition.clone();
|
||||
let restart = self.perform_restart();
|
||||
TransitionState::Restarting(
|
||||
tokio::spawn(async move {
|
||||
if let Err(err) = restart.await {
|
||||
tracing::error!("Error restarting service: {}", err);
|
||||
}
|
||||
transition.send_replace(Default::default());
|
||||
})
|
||||
.into(),
|
||||
)
|
||||
}
|
||||
fn perform_backup(
|
||||
&self,
|
||||
backup_guard: BackupGuard,
|
||||
) -> impl Future<Output = Result<Result<PackageBackupInfo, Error>, Error>> {
|
||||
let manage_container = self.manage_container.clone();
|
||||
let seed = self.seed.clone();
|
||||
async move {
|
||||
let peek = seed.ctx.db.peek().await;
|
||||
let state_reverter = DesiredStateReverter::new(manage_container.clone());
|
||||
let override_guard =
|
||||
manage_container.set_override(get_status(peek, &seed.manifest).backing_up())?;
|
||||
manage_container.wait_for_desired(StartStop::Stop).await;
|
||||
let backup_guard = backup_guard.lock().await;
|
||||
let guard = backup_guard.mount_package_backup(&seed.manifest.id).await?;
|
||||
|
||||
let return_value = seed.manifest.backup.create(seed.clone()).await;
|
||||
guard.unmount().await?;
|
||||
drop(backup_guard);
|
||||
|
||||
let manifest_id = seed.manifest.id.clone();
|
||||
seed.ctx
|
||||
.db
|
||||
.mutate(|db| {
|
||||
if let Some(progress) = db
|
||||
.as_server_info_mut()
|
||||
.as_status_info_mut()
|
||||
.as_backup_progress_mut()
|
||||
.transpose_mut()
|
||||
.and_then(|p| p.as_idx_mut(&manifest_id))
|
||||
{
|
||||
progress.as_complete_mut().ser(&true)?;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.await?;
|
||||
|
||||
state_reverter.revert().await;
|
||||
|
||||
override_guard.drop();
|
||||
Ok::<_, Error>(return_value)
|
||||
}
|
||||
}
|
||||
fn _transition_backup(
|
||||
&self,
|
||||
backup_guard: BackupGuard,
|
||||
) -> (TransitionState, BoxFuture<BackupReturn>) {
|
||||
let (send, done) = oneshot::channel();
|
||||
|
||||
let transition_state = self.transition.clone();
|
||||
(
|
||||
TransitionState::BackingUp(
|
||||
tokio::spawn(
|
||||
self.perform_backup(backup_guard)
|
||||
.then(finish_up_backup_task(transition_state, send)),
|
||||
)
|
||||
.into(),
|
||||
),
|
||||
done.map_err(|err| Error::new(eyre!("Oneshot error: {err:?}"), ErrorKind::Unknown))
|
||||
.map(flatten_backup_error)
|
||||
.boxed(),
|
||||
)
|
||||
}
|
||||
fn _is_transition_restart(&self) -> bool {
|
||||
let transition = self.transition.borrow();
|
||||
matches!(*transition, TransitionState::Restarting(_))
|
||||
}
|
||||
fn _is_transition_backup(&self) -> bool {
|
||||
let transition = self.transition.borrow();
|
||||
matches!(*transition, TransitionState::BackingUp(_))
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
async fn configure(
|
||||
ctx: RpcContext,
|
||||
id: PackageId,
|
||||
mut configure_context: ConfigureContext,
|
||||
) -> Result<BTreeMap<PackageId, String>, Error> {
|
||||
let db = ctx.db.peek().await;
|
||||
let id = &id;
|
||||
let ctx = &ctx;
|
||||
let overrides = &mut configure_context.overrides;
|
||||
// fetch data from db
|
||||
let manifest = db
|
||||
.as_package_data()
|
||||
.as_idx(id)
|
||||
.or_not_found(id)?
|
||||
.as_manifest()
|
||||
.de()?;
|
||||
|
||||
// get current config and current spec
|
||||
let ConfigRes {
|
||||
config: old_config,
|
||||
spec,
|
||||
} = manifest
|
||||
.config
|
||||
.as_ref()
|
||||
.or_not_found("Manifest config")?
|
||||
.get(ctx, id, &manifest.version, &manifest.volumes)
|
||||
.await?;
|
||||
|
||||
// determine new config to use
|
||||
let mut config = if let Some(config) = configure_context.config.or_else(|| old_config.clone()) {
|
||||
config
|
||||
} else {
|
||||
spec.gen(
|
||||
&mut rand::rngs::StdRng::from_entropy(),
|
||||
&configure_context.timeout,
|
||||
)?
|
||||
};
|
||||
|
||||
spec.validate(&manifest)?;
|
||||
spec.matches(&config)?; // check that new config matches spec
|
||||
|
||||
// TODO Commit or not?
|
||||
spec.update(ctx, &manifest, overrides, &mut config).await?; // dereference pointers in the new config
|
||||
|
||||
let manifest = db
|
||||
.as_package_data()
|
||||
.as_idx(id)
|
||||
.or_not_found(id)?
|
||||
.as_installed()
|
||||
.or_not_found(id)?
|
||||
.as_manifest()
|
||||
.de()?;
|
||||
|
||||
let dependencies = &manifest.dependencies;
|
||||
let mut current_dependencies: CurrentDependencies = CurrentDependencies(
|
||||
dependencies
|
||||
.0
|
||||
.iter()
|
||||
.filter_map(|(id, info)| {
|
||||
if info.requirement.required() {
|
||||
Some((id.clone(), CurrentDependencyInfo::default()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
);
|
||||
for ptr in spec.pointers(&config)? {
|
||||
match ptr {
|
||||
ValueSpecPointer::Package(pkg_ptr) => {
|
||||
if let Some(info) = current_dependencies.0.get_mut(pkg_ptr.package_id()) {
|
||||
info.pointers.insert(pkg_ptr);
|
||||
} else {
|
||||
let id = pkg_ptr.package_id().to_owned();
|
||||
let mut pointers = BTreeSet::new();
|
||||
pointers.insert(pkg_ptr);
|
||||
current_dependencies.0.insert(
|
||||
id,
|
||||
CurrentDependencyInfo {
|
||||
pointers,
|
||||
health_checks: BTreeSet::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
ValueSpecPointer::System(_) => (),
|
||||
}
|
||||
}
|
||||
|
||||
let action = manifest.config.as_ref().or_not_found(id)?;
|
||||
let version = &manifest.version;
|
||||
let volumes = &manifest.volumes;
|
||||
if !configure_context.dry_run {
|
||||
// run config action
|
||||
let res = action
|
||||
.set(ctx, id, version, &dependencies, volumes, &config)
|
||||
.await?;
|
||||
|
||||
// track dependencies with no pointers
|
||||
for (package_id, health_checks) in res.depends_on.into_iter() {
|
||||
if let Some(current_dependency) = current_dependencies.0.get_mut(&package_id) {
|
||||
current_dependency.health_checks.extend(health_checks);
|
||||
} else {
|
||||
current_dependencies.0.insert(
|
||||
package_id,
|
||||
CurrentDependencyInfo {
|
||||
pointers: BTreeSet::new(),
|
||||
health_checks,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// track dependency health checks
|
||||
current_dependencies = current_dependencies.map(|x| {
|
||||
x.into_iter()
|
||||
.filter(|(dep_id, _)| {
|
||||
if dep_id != id && !manifest.dependencies.0.contains_key(dep_id) {
|
||||
tracing::warn!("Illegal dependency specified: {}", dep_id);
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
}
|
||||
|
||||
let dependency_config_errs =
|
||||
compute_dependency_config_errs(&ctx, &db, &manifest, ¤t_dependencies, overrides)
|
||||
.await?;
|
||||
|
||||
// cache current config for dependents
|
||||
configure_context
|
||||
.overrides
|
||||
.insert(id.clone(), config.clone());
|
||||
|
||||
// handle dependents
|
||||
|
||||
let dependents = db
|
||||
.as_package_data()
|
||||
.as_idx(id)
|
||||
.or_not_found(id)?
|
||||
.as_installed()
|
||||
.or_not_found(id)?
|
||||
.as_current_dependents()
|
||||
.de()?;
|
||||
for (dependent, _dep_info) in dependents.0.iter().filter(|(dep_id, _)| dep_id != &id) {
|
||||
// check if config passes dependent check
|
||||
if let Some(cfg) = db
|
||||
.as_package_data()
|
||||
.as_idx(dependent)
|
||||
.or_not_found(dependent)?
|
||||
.as_installed()
|
||||
.or_not_found(dependent)?
|
||||
.as_manifest()
|
||||
.as_dependencies()
|
||||
.as_idx(id)
|
||||
.or_not_found(id)?
|
||||
.as_config()
|
||||
.de()?
|
||||
{
|
||||
let manifest = db
|
||||
.as_package_data()
|
||||
.as_idx(dependent)
|
||||
.or_not_found(dependent)?
|
||||
.as_installed()
|
||||
.or_not_found(dependent)?
|
||||
.as_manifest()
|
||||
.de()?;
|
||||
if let Err(error) = cfg
|
||||
.check(
|
||||
ctx,
|
||||
dependent,
|
||||
&manifest.version,
|
||||
&manifest.volumes,
|
||||
id,
|
||||
&config,
|
||||
)
|
||||
.await?
|
||||
{
|
||||
configure_context.breakages.insert(dependent.clone(), error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !configure_context.dry_run {
|
||||
return ctx
|
||||
.db
|
||||
.mutate(move |db| {
|
||||
remove_from_current_dependents_lists(db, id, ¤t_dependencies)?;
|
||||
add_dependent_to_current_dependents_lists(db, id, ¤t_dependencies)?;
|
||||
current_dependencies.0.remove(id);
|
||||
for (dep, errs) in db
|
||||
.as_package_data_mut()
|
||||
.as_entries_mut()?
|
||||
.into_iter()
|
||||
.filter_map(|(id, pde)| {
|
||||
pde.as_installed_mut()
|
||||
.map(|i| (id, i.as_status_mut().as_dependency_config_errors_mut()))
|
||||
})
|
||||
{
|
||||
errs.remove(id)?;
|
||||
if let Some(err) = configure_context.breakages.get(&dep) {
|
||||
errs.insert(id, err)?;
|
||||
}
|
||||
}
|
||||
let installed = db
|
||||
.as_package_data_mut()
|
||||
.as_idx_mut(id)
|
||||
.or_not_found(id)?
|
||||
.as_installed_mut()
|
||||
.or_not_found(id)?;
|
||||
installed
|
||||
.as_current_dependencies_mut()
|
||||
.ser(¤t_dependencies)?;
|
||||
let status = installed.as_status_mut();
|
||||
status.as_configured_mut().ser(&true)?;
|
||||
status
|
||||
.as_dependency_config_errors_mut()
|
||||
.ser(&dependency_config_errs)?;
|
||||
Ok(configure_context.breakages)
|
||||
})
|
||||
.await; // add new
|
||||
}
|
||||
|
||||
Ok(configure_context.breakages)
|
||||
}
|
||||
|
||||
struct DesiredStateReverter {
|
||||
manage_container: Option<Arc<ManageContainer>>,
|
||||
starting_state: StartStop,
|
||||
}
|
||||
impl DesiredStateReverter {
|
||||
fn new(manage_container: Arc<ManageContainer>) -> Self {
|
||||
let starting_state = *manage_container.desired_state().borrow();
|
||||
let manage_container = Some(manage_container);
|
||||
Self {
|
||||
starting_state,
|
||||
manage_container,
|
||||
}
|
||||
}
|
||||
async fn revert(mut self) {
|
||||
if let Some(mut current_state) = self._revert() {
|
||||
while *current_state.borrow() != self.starting_state {
|
||||
current_state.changed().await.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
fn _revert(&mut self) -> Option<watch::Receiver<StartStop>> {
|
||||
if let Some(manage_container) = self.manage_container.take() {
|
||||
manage_container.to_desired(self.starting_state);
|
||||
|
||||
return Some(manage_container.desired_state());
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
impl Drop for DesiredStateReverter {
|
||||
fn drop(&mut self) {
|
||||
self._revert();
|
||||
}
|
||||
}
|
||||
|
||||
type BackupDoneSender = oneshot::Sender<Result<PackageBackupInfo, Error>>;
|
||||
|
||||
fn finish_up_backup_task(
|
||||
transition: Arc<Sender<TransitionState>>,
|
||||
send: BackupDoneSender,
|
||||
) -> impl FnOnce(Result<Result<PackageBackupInfo, Error>, Error>) -> BoxFuture<'static, ()> {
|
||||
move |result| {
|
||||
async move {
|
||||
transition.send_replace(Default::default());
|
||||
send.send(match result {
|
||||
Ok(a) => a,
|
||||
Err(e) => Err(e),
|
||||
})
|
||||
.unwrap_or_default();
|
||||
}
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
fn response_to_report(response: &Result<PackageBackupInfo, Error>) -> PackageBackupReport {
|
||||
PackageBackupReport {
|
||||
error: response.as_ref().err().map(|e| e.to_string()),
|
||||
}
|
||||
}
|
||||
fn flatten_backup_error(input: Result<Result<PackageBackupInfo, Error>, Error>) -> BackupReturn {
|
||||
match input {
|
||||
Ok(a) => BackupReturn::Ran {
|
||||
report: response_to_report(&a),
|
||||
res: a,
|
||||
},
|
||||
Err(err) => BackupReturn::Error(err),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Status {
|
||||
Starting,
|
||||
Running,
|
||||
Stopped,
|
||||
Paused,
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum OnStop {
|
||||
Restart,
|
||||
Sleep,
|
||||
Exit,
|
||||
}
|
||||
|
||||
type RunMainResult = Result<Result<NoOutput, (i32, String)>, Error>;
|
||||
|
||||
#[instrument(skip_all)]
|
||||
async fn run_main(
|
||||
seed: Arc<ManagerSeed>,
|
||||
persistent_container: ManagerPersistentContainer,
|
||||
started: Arc<impl Fn()>,
|
||||
) -> RunMainResult {
|
||||
let mut runtime = NonDetachingJoinHandle::from(tokio::spawn(start_up_image(seed.clone())));
|
||||
let ip = match persistent_container.is_some() {
|
||||
false => Some(match get_running_ip(&seed, &mut runtime).await {
|
||||
GetRunningIp::Ip(x) => x,
|
||||
GetRunningIp::Error(e) => return Err(e),
|
||||
GetRunningIp::EarlyExit(x) => return Ok(x),
|
||||
}),
|
||||
true => None,
|
||||
};
|
||||
|
||||
let svc = if let Some(ip) = ip {
|
||||
let net = add_network_for_main(&seed, ip).await?;
|
||||
started();
|
||||
Some(net)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let health = main_health_check_daemon(seed.clone());
|
||||
let res = tokio::select! {
|
||||
a = runtime => a.map_err(|_| Error::new(eyre!("Manager runtime panicked!"), crate::ErrorKind::Docker)).and_then(|a| a),
|
||||
_ = health => Err(Error::new(eyre!("Health check daemon exited!"), crate::ErrorKind::Unknown))
|
||||
};
|
||||
if let Some(svc) = svc {
|
||||
remove_network_for_main(svc).await?;
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
/// We want to start up the manifest, but in this case we want to know that we have generated the certificates.
|
||||
/// Note for _generated_certificate: Needed to know that before we start the state we have generated the certificate
|
||||
async fn start_up_image(seed: Arc<ManagerSeed>) -> Result<Result<NoOutput, (i32, String)>, Error> {
|
||||
seed.manifest
|
||||
.main
|
||||
.execute::<(), NoOutput>(
|
||||
&seed.ctx,
|
||||
&seed.manifest.id,
|
||||
&seed.manifest.version,
|
||||
ProcedureName::Main,
|
||||
&seed.manifest.volumes,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn long_running_docker(
|
||||
seed: &ManagerSeed,
|
||||
container: &DockerContainer,
|
||||
) -> Result<(LongRunning, UnixRpcClient), Error> {
|
||||
container
|
||||
.long_running_execute(
|
||||
&seed.ctx,
|
||||
&seed.manifest.id,
|
||||
&seed.manifest.version,
|
||||
&seed.manifest.volumes,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
enum GetRunningIp {
|
||||
Ip(Ipv4Addr),
|
||||
Error(Error),
|
||||
EarlyExit(Result<NoOutput, (i32, String)>),
|
||||
}
|
||||
|
||||
async fn get_long_running_ip(seed: &ManagerSeed, runtime: &mut LongRunning) -> GetRunningIp {
|
||||
loop {
|
||||
match get_container_ip(&seed.container_name).await {
|
||||
Ok(Some(ip_addr)) => return GetRunningIp::Ip(ip_addr),
|
||||
Ok(None) => (),
|
||||
Err(e) if e.kind == ErrorKind::NotFound => (),
|
||||
Err(e) => return GetRunningIp::Error(e),
|
||||
}
|
||||
if let Poll::Ready(res) = futures::poll!(&mut runtime.running_output) {
|
||||
match res {
|
||||
Ok(_) => return GetRunningIp::EarlyExit(Ok(NoOutput)),
|
||||
Err(_e) => {
|
||||
return GetRunningIp::Error(Error::new(
|
||||
eyre!("Manager runtime panicked!"),
|
||||
crate::ErrorKind::Docker,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(skip(seed))]
|
||||
async fn add_network_for_main(
|
||||
seed: &ManagerSeed,
|
||||
ip: std::net::Ipv4Addr,
|
||||
) -> Result<NetService, Error> {
|
||||
let mut svc = seed
|
||||
.ctx
|
||||
.net_controller
|
||||
.create_service(seed.manifest.id.clone(), ip)
|
||||
.await?;
|
||||
// DEPRECATED
|
||||
let mut secrets = seed.ctx.secret_store.acquire().await?;
|
||||
let mut tx = secrets.begin().await?;
|
||||
for (id, interface) in &seed.manifest.interfaces.0 {
|
||||
for (external, internal) in interface.lan_config.iter().flatten() {
|
||||
svc.add_lan(
|
||||
tx.as_mut(),
|
||||
id.clone(),
|
||||
external.0,
|
||||
internal.internal,
|
||||
Err(AlpnInfo::Specified(vec![])),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
for (external, internal) in interface.tor_config.iter().flat_map(|t| &t.port_mapping) {
|
||||
svc.add_tor(tx.as_mut(), id.clone(), external.0, internal.0)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
for volume in seed.manifest.volumes.values() {
|
||||
if let Volume::Certificate { interface_id } = volume {
|
||||
svc.export_cert(tx.as_mut(), interface_id, ip.into())
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
tx.commit().await?;
|
||||
Ok(svc)
|
||||
}
|
||||
|
||||
#[instrument(skip(svc))]
|
||||
async fn remove_network_for_main(svc: NetService) -> Result<(), Error> {
|
||||
svc.remove_all().await
|
||||
}
|
||||
|
||||
async fn main_health_check_daemon(seed: Arc<ManagerSeed>) {
|
||||
tokio::time::sleep(Duration::from_secs(HEALTH_CHECK_GRACE_PERIOD_SECONDS)).await;
|
||||
loop {
|
||||
if let Err(e) = health::check(&seed.ctx, &seed.manifest.id).await {
|
||||
tracing::error!(
|
||||
"Failed to run health check for {}: {}",
|
||||
&seed.manifest.id,
|
||||
e
|
||||
);
|
||||
tracing::debug!("{:?}", e);
|
||||
}
|
||||
tokio::time::sleep(Duration::from_secs(HEALTH_CHECK_COOLDOWN_SECONDS)).await;
|
||||
}
|
||||
}
|
||||
|
||||
type RuntimeOfCommand = NonDetachingJoinHandle<Result<Result<NoOutput, (i32, String)>, Error>>;
|
||||
|
||||
#[instrument(skip(seed, runtime))]
|
||||
async fn get_running_ip(seed: &ManagerSeed, mut runtime: &mut RuntimeOfCommand) -> GetRunningIp {
|
||||
loop {
|
||||
match get_container_ip(&seed.container_name).await {
|
||||
Ok(Some(ip_addr)) => return GetRunningIp::Ip(ip_addr),
|
||||
Ok(None) => (),
|
||||
Err(e) if e.kind == ErrorKind::NotFound => (),
|
||||
Err(e) => return GetRunningIp::Error(e),
|
||||
}
|
||||
if let Poll::Ready(res) = futures::poll!(&mut runtime) {
|
||||
match res {
|
||||
Ok(Ok(response)) => return GetRunningIp::EarlyExit(response),
|
||||
Err(e) => {
|
||||
return GetRunningIp::Error(Error::new(
|
||||
match e.try_into_panic() {
|
||||
Ok(e) => {
|
||||
eyre!(
|
||||
"Manager runtime panicked: {}",
|
||||
e.downcast_ref::<&'static str>().unwrap_or(&"UNKNOWN")
|
||||
)
|
||||
}
|
||||
_ => eyre!("Manager runtime cancelled!"),
|
||||
},
|
||||
crate::ErrorKind::Docker,
|
||||
))
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
return GetRunningIp::Error(Error::new(
|
||||
eyre!("Manager runtime returned error: {}", e),
|
||||
crate::ErrorKind::Docker,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_signal(manager: &Manager, gid: Arc<Gid>, signal: Signal) -> Result<(), Error> {
|
||||
// stop health checks from committing their results
|
||||
// shared
|
||||
// .commit_health_check_results
|
||||
// .store(false, Ordering::SeqCst);
|
||||
|
||||
if let Some(rpc_client) = manager.rpc_client() {
|
||||
let main_gid = *gid.main_gid.0.borrow();
|
||||
let next_gid = gid.new_gid();
|
||||
#[cfg(feature = "js-engine")]
|
||||
if let Err(e) = crate::procedure::js_scripts::JsProcedure::default()
|
||||
.execute::<_, NoOutput>(
|
||||
&manager.seed.ctx.datadir,
|
||||
&manager.seed.manifest.id,
|
||||
&manager.seed.manifest.version,
|
||||
ProcedureName::Signal,
|
||||
&manager.seed.manifest.volumes,
|
||||
Some(container_init::SignalGroupParams {
|
||||
gid: main_gid,
|
||||
signal: signal as u32,
|
||||
}),
|
||||
None, // TODO
|
||||
next_gid,
|
||||
Some(rpc_client),
|
||||
)
|
||||
.await?
|
||||
{
|
||||
tracing::error!("Failed to send js signal: {}", e.1);
|
||||
tracing::debug!("{:?}", e);
|
||||
}
|
||||
} else {
|
||||
// send signal to container
|
||||
kill_container(&manager.seed.container_name, Some(signal))
|
||||
.await
|
||||
.or_else(|e| {
|
||||
if e.kind == ErrorKind::NotFound {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(e)
|
||||
}
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
101
core/startos/src/manager/persistent_container.rs
Normal file
101
core/startos/src/manager/persistent_container.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use color_eyre::eyre::eyre;
|
||||
use helpers::UnixRpcClient;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio::sync::watch::{self, Receiver};
|
||||
use tracing::instrument;
|
||||
|
||||
use super::manager_seed::ManagerSeed;
|
||||
use super::{
|
||||
add_network_for_main, get_long_running_ip, long_running_docker, remove_network_for_main,
|
||||
GetRunningIp,
|
||||
};
|
||||
use crate::procedure::docker::DockerContainer;
|
||||
use crate::util::NonDetachingJoinHandle;
|
||||
use crate::Error;
|
||||
|
||||
/// Persistant container are the old containers that need to run all the time
|
||||
/// The goal is that all services will be persistent containers, waiting to run the main system.
|
||||
pub struct PersistentContainer {
|
||||
_running_docker: NonDetachingJoinHandle<()>,
|
||||
pub rpc_client: Receiver<Arc<UnixRpcClient>>,
|
||||
}
|
||||
|
||||
impl PersistentContainer {
|
||||
#[instrument(skip_all)]
|
||||
pub async fn init(seed: &Arc<ManagerSeed>) -> Result<Option<Self>, Error> {
|
||||
Ok(if let Some(containers) = &seed.manifest.containers {
|
||||
let (running_docker, rpc_client) =
|
||||
spawn_persistent_container(seed.clone(), containers.main.clone()).await?;
|
||||
Some(Self {
|
||||
_running_docker: running_docker,
|
||||
rpc_client,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
})
|
||||
}
|
||||
|
||||
pub fn rpc_client(&self) -> Arc<UnixRpcClient> {
|
||||
self.rpc_client.borrow().clone()
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn spawn_persistent_container(
|
||||
seed: Arc<ManagerSeed>,
|
||||
container: DockerContainer,
|
||||
) -> Result<(NonDetachingJoinHandle<()>, Receiver<Arc<UnixRpcClient>>), Error> {
|
||||
let (send_inserter, inserter) = oneshot::channel();
|
||||
Ok((
|
||||
tokio::task::spawn(async move {
|
||||
let mut inserter_send: Option<watch::Sender<Arc<UnixRpcClient>>> = None;
|
||||
let mut send_inserter: Option<oneshot::Sender<Receiver<Arc<UnixRpcClient>>>> = Some(send_inserter);
|
||||
loop {
|
||||
if let Err(e) = async {
|
||||
let (mut runtime, inserter) =
|
||||
long_running_docker(&seed, &container).await?;
|
||||
|
||||
|
||||
let ip = match get_long_running_ip(&seed, &mut runtime).await {
|
||||
GetRunningIp::Ip(x) => x,
|
||||
GetRunningIp::Error(e) => return Err(e),
|
||||
GetRunningIp::EarlyExit(e) => {
|
||||
tracing::error!("Early Exit");
|
||||
tracing::debug!("{:?}", e);
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
let svc = add_network_for_main(&seed, ip).await?;
|
||||
|
||||
if let Some(inserter_send) = inserter_send.as_mut() {
|
||||
let _ = inserter_send.send(Arc::new(inserter));
|
||||
} else {
|
||||
let (s, r) = watch::channel(Arc::new(inserter));
|
||||
inserter_send = Some(s);
|
||||
if let Some(send_inserter) = send_inserter.take() {
|
||||
let _ = send_inserter.send(r);
|
||||
}
|
||||
}
|
||||
|
||||
let res = tokio::select! {
|
||||
a = runtime.running_output => a.map_err(|_| Error::new(eyre!("Manager runtime panicked!"), crate::ErrorKind::Docker)).map(|_| ()),
|
||||
};
|
||||
|
||||
remove_network_for_main(svc).await?;
|
||||
|
||||
res
|
||||
}.await {
|
||||
tracing::error!("Error in persistent container: {}", e);
|
||||
tracing::debug!("{:?}", e);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
}
|
||||
})
|
||||
.into(),
|
||||
inserter.await.map_err(|_| Error::new(eyre!("Container handle dropped before inserter sent"), crate::ErrorKind::Unknown))?,
|
||||
))
|
||||
}
|
||||
32
core/startos/src/manager/start_stop.rs
Normal file
32
core/startos/src/manager/start_stop.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
use crate::status::MainStatus;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum StartStop {
|
||||
Start,
|
||||
Stop,
|
||||
}
|
||||
|
||||
impl StartStop {
|
||||
pub(crate) fn is_start(&self) -> bool {
|
||||
matches!(self, StartStop::Start)
|
||||
}
|
||||
}
|
||||
impl From<MainStatus> for StartStop {
|
||||
fn from(value: MainStatus) -> Self {
|
||||
match value {
|
||||
MainStatus::Stopped => StartStop::Stop,
|
||||
MainStatus::Restarting => StartStop::Start,
|
||||
MainStatus::Stopping => StartStop::Stop,
|
||||
MainStatus::Starting => StartStop::Start,
|
||||
MainStatus::Running {
|
||||
started: _,
|
||||
health: _,
|
||||
} => StartStop::Start,
|
||||
MainStatus::BackingUp { started, health: _ } if started.is_some() => StartStop::Start,
|
||||
MainStatus::BackingUp {
|
||||
started: _,
|
||||
health: _,
|
||||
} => StartStop::Stop,
|
||||
}
|
||||
}
|
||||
}
|
||||
35
core/startos/src/manager/transition_state.rs
Normal file
35
core/startos/src/manager/transition_state.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
use helpers::NonDetachingJoinHandle;
|
||||
|
||||
/// Used only in the manager/mod and is used to keep track of the state of the manager during the
|
||||
/// transitional states
|
||||
pub(super) enum TransitionState {
|
||||
BackingUp(NonDetachingJoinHandle<()>),
|
||||
Restarting(NonDetachingJoinHandle<()>),
|
||||
None,
|
||||
}
|
||||
|
||||
impl TransitionState {
|
||||
pub(super) fn take(&mut self) -> Self {
|
||||
std::mem::take(self)
|
||||
}
|
||||
pub(super) fn into_join_handle(self) -> Option<NonDetachingJoinHandle<()>> {
|
||||
Some(match self {
|
||||
TransitionState::BackingUp(a) => a,
|
||||
TransitionState::Restarting(a) => a,
|
||||
TransitionState::None => return None,
|
||||
})
|
||||
}
|
||||
pub(super) async fn abort(&mut self) {
|
||||
if let Some(s) = self.take().into_join_handle() {
|
||||
if s.wait_for_abort().await.is_ok() {
|
||||
tracing::trace!("transition completed before abort");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TransitionState {
|
||||
fn default() -> Self {
|
||||
TransitionState::None
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user