diff --git a/agent/config/restarter.sh b/agent/config/restarter.sh index 6f1408133..f0045d54a 100644 --- a/agent/config/restarter.sh +++ b/agent/config/restarter.sh @@ -1,18 +1,3 @@ #!/bin/bash -for CONTAINER in $(docker ps -aq); do - EXIT=`docker inspect -f "{{ .State.ExitCode }}" $CONTAINER` - if [ $EXIT -eq 0 ]; then - continue - fi - if [ $EXIT -eq 143 ]; then - continue - fi - if [ $EXIT -eq 137 ]; then - OOM=`docker inspect -f "{{ .State.OOMKilled }}" $CONTAINER` - if [ "$OOM" == "false" ]; then - continue - fi - fi - docker start $CONTAINER -done \ No newline at end of file +exec appmgr repair-app-status \ No newline at end of file diff --git a/appmgr/src/control.rs b/appmgr/src/control.rs index 72e464ca8..f4c1d0381 100644 --- a/appmgr/src/control.rs +++ b/appmgr/src/control.rs @@ -1,9 +1,10 @@ use std::path::Path; use futures::future::{BoxFuture, FutureExt}; -use linear_map::LinearMap; +use linear_map::{set::LinearSet, LinearMap}; use crate::dependencies::{DependencyError, TaggedDependencyError}; +use crate::util::{from_yaml_async_reader, PersistencePath, YamlUpdateHandle}; use crate::Error; pub async fn start_app(name: &str, update_metadata: bool) -> Result<(), Error> { @@ -38,6 +39,12 @@ pub async fn start_app(name: &str, update_metadata: bool) -> Result<(), Error> { "Failed to Start Application: {}", std::str::from_utf8(&output.stderr).unwrap_or("Unknown Error") ); + let mut running = YamlUpdateHandle::>::new_or_default( + PersistencePath::from_ref("running.yaml"), + ) + .await?; + running.insert(name.to_owned()); + running.commit().await?; } else if status == crate::apps::DockerStatus::Paused { resume_app(name).await?; } @@ -79,6 +86,12 @@ pub async fn stop_app( "Failed to Stop Application: {}", std::str::from_utf8(&output.stderr).unwrap_or("Unknown Error") ); + let mut running = YamlUpdateHandle::>::new_or_default( + PersistencePath::from_ref("running.yaml"), + ) + .await?; + running.remove(name); + running.commit().await?; crate::util::unlock(lock).await?; } Ok(res) @@ -192,3 +205,34 @@ pub async fn resume_app(name: &str) -> Result<(), Error> { crate::util::unlock(lock).await?; Ok(()) } + +pub async fn repair_app_status() -> Result<(), Error> { + let running: Vec = if let Some(mut f) = PersistencePath::from_ref("running.yaml") + .maybe_read(false) + .await + .transpose()? + { + from_yaml_async_reader(&mut *f).await? + } else { + Vec::new() + }; + for name in running { + let lock = crate::util::lock_file( + format!( + "{}", + Path::new(crate::PERSISTENCE_DIR) + .join("apps") + .join(&name) + .join("control.lock") + .display() + ), + true, + ) + .await?; + if crate::apps::status(&name).await?.status == crate::apps::DockerStatus::Stopped { + start_app(&name, true).await?; + } + crate::util::unlock(lock).await?; + } + Ok(()) +} diff --git a/appmgr/src/main.rs b/appmgr/src/main.rs index 94f7bea73..4a32faa4b 100644 --- a/appmgr/src/main.rs +++ b/appmgr/src/main.rs @@ -816,6 +816,9 @@ async fn inner_main() -> Result<(), Error> { .takes_value(true) .help("Password to use for encryption of backup file"), ), + ) + .subcommand( + SubCommand::with_name("repair-app-status").about("Restarts crashed apps"), // TODO: remove ), ); @@ -1540,6 +1543,10 @@ async fn inner_main() -> Result<(), Error> { std::process::exit(1); } }, + #[cfg(not(feature = "portable"))] + ("repair-app-status", _) => { + control::repair_app_status().await?; + } ("pack", Some(sub_m)) => { pack( sub_m.value_of("PATH").unwrap(),