mirror of
https://github.com/Start9Labs/start-os.git
synced 2026-03-30 20:14:49 +00:00
adds flag to manager shared state describing whether health checks should be committed, sets that flag on stop and signal
This commit is contained in:
committed by
Aiden McClelland
parent
d8fb32ea8b
commit
e578062082
@@ -505,26 +505,19 @@ pub fn configure<'a, Db: DbHandle>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Some(signal) = signal {
|
if let Some(signal) = signal {
|
||||||
ctx.docker
|
match ctx.managers.get(&(id.clone(), version.clone())).await {
|
||||||
.kill_container(
|
None => {
|
||||||
&DockerAction::container_name(id, None),
|
// in theory this should never happen, which indicates this function should be moved behind the
|
||||||
Some(KillContainerOptions {
|
// Manager interface
|
||||||
signal: signal.to_string(),
|
return Err(Error::new(
|
||||||
}),
|
eyre!("Manager Not Found for package being configured"),
|
||||||
)
|
crate::ErrorKind::Incoherent,
|
||||||
.await
|
));
|
||||||
// ignore container is not running https://docs.docker.com/engine/api/v1.41/#operation/ContainerKill
|
}
|
||||||
.or_else(|e| {
|
Some(m) => {
|
||||||
if matches!(
|
m.signal(&signal).await?;
|
||||||
e,
|
}
|
||||||
bollard::errors::Error::DockerResponseConflictError { .. }
|
}
|
||||||
| bollard::errors::Error::DockerResponseNotFoundError { .. }
|
|
||||||
) {
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
Err(e)
|
|
||||||
}
|
|
||||||
})?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -60,6 +60,7 @@ pub enum ErrorKind {
|
|||||||
ParseDbField = 52,
|
ParseDbField = 52,
|
||||||
Duplicate = 53,
|
Duplicate = 53,
|
||||||
MultipleErrors = 54,
|
MultipleErrors = 54,
|
||||||
|
Incoherent = 55,
|
||||||
}
|
}
|
||||||
impl ErrorKind {
|
impl ErrorKind {
|
||||||
pub fn as_str(&self) -> &'static str {
|
pub fn as_str(&self) -> &'static str {
|
||||||
@@ -119,6 +120,7 @@ impl ErrorKind {
|
|||||||
ParseDbField => "Database Field Parse Error",
|
ParseDbField => "Database Field Parse Error",
|
||||||
Duplicate => "Duplication Error",
|
Duplicate => "Duplication Error",
|
||||||
MultipleErrors => "Multiple Errors",
|
MultipleErrors => "Multiple Errors",
|
||||||
|
Incoherent => "Incoherent",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
use std::sync::atomic::AtomicBool;
|
||||||
|
|
||||||
use patch_db::DbHandle;
|
use patch_db::DbHandle;
|
||||||
use tracing::instrument;
|
use tracing::instrument;
|
||||||
@@ -15,6 +16,7 @@ pub async fn check<Db: DbHandle>(
|
|||||||
ctx: &RpcContext,
|
ctx: &RpcContext,
|
||||||
db: &mut Db,
|
db: &mut Db,
|
||||||
id: &PackageId,
|
id: &PackageId,
|
||||||
|
should_commit: &AtomicBool,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
let mut tx = db.begin().await?;
|
let mut tx = db.begin().await?;
|
||||||
|
|
||||||
@@ -41,7 +43,10 @@ pub async fn check<Db: DbHandle>(
|
|||||||
.get_mut(&mut checkpoint)
|
.get_mut(&mut checkpoint)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
status.main.check(&ctx, &mut checkpoint, &*manifest).await?;
|
status
|
||||||
|
.main
|
||||||
|
.check(&ctx, &mut checkpoint, &*manifest, should_commit)
|
||||||
|
.await?;
|
||||||
|
|
||||||
let failed = match &status.main {
|
let failed = match &status.main {
|
||||||
MainStatus::Running { health, .. } => health.clone(),
|
MainStatus::Running { health, .. } => health.clone(),
|
||||||
|
|||||||
@@ -1,13 +1,14 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::future::Future;
|
use std::future::Future;
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::task::Poll;
|
use std::task::Poll;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use bollard::container::StopContainerOptions;
|
use bollard::container::{KillContainerOptions, StopContainerOptions};
|
||||||
use color_eyre::eyre::eyre;
|
use color_eyre::eyre::eyre;
|
||||||
|
use nix::sys::signal::Signal;
|
||||||
use num_enum::TryFromPrimitive;
|
use num_enum::TryFromPrimitive;
|
||||||
use patch_db::DbHandle;
|
use patch_db::DbHandle;
|
||||||
use sqlx::{Executor, Sqlite};
|
use sqlx::{Executor, Sqlite};
|
||||||
@@ -158,6 +159,7 @@ pub struct ManagerSharedState {
|
|||||||
tor_keys: BTreeMap<InterfaceId, TorSecretKeyV3>,
|
tor_keys: BTreeMap<InterfaceId, TorSecretKeyV3>,
|
||||||
synchronized: Notify,
|
synchronized: Notify,
|
||||||
synchronize_now: Notify,
|
synchronize_now: Notify,
|
||||||
|
commit_health_check_results: AtomicBool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
@@ -255,11 +257,22 @@ async fn run_main(
|
|||||||
.collect::<Result<Vec<_>, Error>>()?,
|
.collect::<Result<Vec<_>, Error>>()?,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
state
|
||||||
|
.commit_health_check_results
|
||||||
|
.store(true, Ordering::SeqCst);
|
||||||
let health = async {
|
let health = async {
|
||||||
loop {
|
loop {
|
||||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||||
let mut db = state.ctx.db.handle();
|
let mut db = state.ctx.db.handle();
|
||||||
if let Err(e) = health::check(&state.ctx, &mut db, &state.manifest.id).await {
|
if let Err(e) = health::check(
|
||||||
|
&state.ctx,
|
||||||
|
&mut db,
|
||||||
|
&state.manifest.id,
|
||||||
|
&state.commit_health_check_results,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
tracing::error!(
|
tracing::error!(
|
||||||
"Failed to run health check for {}: {}",
|
"Failed to run health check for {}: {}",
|
||||||
&state.manifest.id,
|
&state.manifest.id,
|
||||||
@@ -310,6 +323,7 @@ impl Manager {
|
|||||||
tor_keys,
|
tor_keys,
|
||||||
synchronized: Notify::new(),
|
synchronized: Notify::new(),
|
||||||
synchronize_now: Notify::new(),
|
synchronize_now: Notify::new(),
|
||||||
|
commit_health_check_results: AtomicBool::new(true),
|
||||||
});
|
});
|
||||||
let thread_shared = shared.clone();
|
let thread_shared = shared.clone();
|
||||||
let thread = tokio::spawn(async move {
|
let thread = tokio::spawn(async move {
|
||||||
@@ -324,6 +338,37 @@ impl Manager {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn signal(&self, signal: &Signal) -> Result<(), Error> {
|
||||||
|
// stop health checks from committing their results
|
||||||
|
self.shared
|
||||||
|
.commit_health_check_results
|
||||||
|
.store(false, Ordering::SeqCst);
|
||||||
|
|
||||||
|
// send signal to container
|
||||||
|
self.shared
|
||||||
|
.ctx
|
||||||
|
.docker
|
||||||
|
.kill_container(
|
||||||
|
&self.shared.container_name,
|
||||||
|
Some(KillContainerOptions {
|
||||||
|
signal: signal.to_string(),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.or_else(|e| {
|
||||||
|
if matches!(
|
||||||
|
e,
|
||||||
|
bollard::errors::Error::DockerResponseConflictError { .. }
|
||||||
|
| bollard::errors::Error::DockerResponseNotFoundError { .. }
|
||||||
|
) {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(e)
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip(self))]
|
||||||
async fn exit(&self) -> Result<(), Error> {
|
async fn exit(&self) -> Result<(), Error> {
|
||||||
let _ = self.shared.on_stop.send(OnStop::Exit);
|
let _ = self.shared.on_stop.send(OnStop::Exit);
|
||||||
@@ -436,6 +481,9 @@ async fn manager_thread_loop(mut recv: Receiver<OnStop>, thread_shared: &Arc<Man
|
|||||||
|
|
||||||
#[instrument(skip(shared))]
|
#[instrument(skip(shared))]
|
||||||
async fn stop(shared: &ManagerSharedState) -> Result<(), Error> {
|
async fn stop(shared: &ManagerSharedState) -> Result<(), Error> {
|
||||||
|
shared
|
||||||
|
.commit_health_check_results
|
||||||
|
.store(false, Ordering::SeqCst);
|
||||||
shared.on_stop.send(OnStop::Sleep).map_err(|_| {
|
shared.on_stop.send(OnStop::Sleep).map_err(|_| {
|
||||||
Error::new(
|
Error::new(
|
||||||
eyre!("Manager has already been shutdown"),
|
eyre!("Manager has already been shutdown"),
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use patch_db::{DbHandle, HasModel};
|
use patch_db::{DbHandle, HasModel};
|
||||||
@@ -46,10 +47,11 @@ impl MainStatus {
|
|||||||
ctx: &RpcContext,
|
ctx: &RpcContext,
|
||||||
db: &mut Db,
|
db: &mut Db,
|
||||||
manifest: &Manifest,
|
manifest: &Manifest,
|
||||||
|
should_commit: &AtomicBool,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
match self {
|
match self {
|
||||||
MainStatus::Running { started, health } => {
|
MainStatus::Running { started, health } => {
|
||||||
*health = manifest
|
let health_result = manifest
|
||||||
.health_checks
|
.health_checks
|
||||||
.check_all(
|
.check_all(
|
||||||
ctx,
|
ctx,
|
||||||
@@ -59,6 +61,12 @@ impl MainStatus {
|
|||||||
&manifest.volumes,
|
&manifest.volumes,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
if !should_commit.load(Ordering::SeqCst) {
|
||||||
|
return Ok(());
|
||||||
|
} else {
|
||||||
|
// only commit health check results if we are supposed to
|
||||||
|
*health = health_result;
|
||||||
|
}
|
||||||
let mut should_stop = false;
|
let mut should_stop = false;
|
||||||
for (check, res) in health {
|
for (check, res) in health {
|
||||||
match &res {
|
match &res {
|
||||||
|
|||||||
Reference in New Issue
Block a user