mute crash notifications when service is just started (#1245)

* mute crash notifications when service is just started

* mute first service crash if within 15s of start
This commit is contained in:
Aiden McClelland
2022-02-21 16:53:06 -07:00
committed by GitHub
parent 272704794c
commit 42e1a02a61
2 changed files with 31 additions and 11 deletions

View File

@@ -7,6 +7,7 @@ use std::sync::Arc;
use std::time::Duration;
use bollard::Docker;
use chrono::Utc;
use color_eyre::eyre::eyre;
use patch_db::json_ptr::JsonPointer;
use patch_db::{DbHandle, LockType, PatchDb, Revision};
@@ -287,13 +288,14 @@ impl RpcContext {
main,
MainStatus::Stopped, /* placeholder */
) {
MainStatus::BackingUp { started, health } => {
if let Some(started) = started {
MainStatus::Running { started, health }
MainStatus::BackingUp { started, .. } => {
if let Some(_) = started {
MainStatus::Starting
} else {
MainStatus::Stopped
}
}
MainStatus::Running { .. } => MainStatus::Starting,
a => a,
};
*main = new_main;

View File

@@ -7,6 +7,7 @@ use std::task::Poll;
use std::time::Duration;
use bollard::container::{KillContainerOptions, StopContainerOptions};
use chrono::Utc;
use color_eyre::eyre::eyre;
use nix::sys::signal::Signal;
use num_enum::TryFromPrimitive;
@@ -26,6 +27,7 @@ use crate::net::interface::InterfaceId;
use crate::net::GeneratedCertificateMountPoint;
use crate::notifications::NotificationLevel;
use crate::s9pk::manifest::{Manifest, PackageId};
use crate::status::MainStatus;
use crate::util::{Container, NonDetachingJoinHandle, Version};
use crate::Error;
@@ -476,23 +478,39 @@ async fn manager_thread_loop(mut recv: Receiver<OnStop>, thread_shared: &Arc<Man
match run_main(&thread_shared).await {
Ok(Ok(NoOutput)) => (), // restart
Ok(Err(e)) => {
let res = thread_shared.ctx.notification_manager
let mut db = thread_shared.ctx.db.handle();
let started = crate::db::DatabaseModel::new()
.package_data()
.idx_model(&thread_shared.manifest.id)
.and_then(|pde| pde.installed())
.map::<_, MainStatus>(|i| i.status().main())
.get(&mut db, false)
.await;
match started.as_deref() {
Ok(Some(MainStatus::Running { started, .. }))
if Utc::now().signed_duration_since(*started)
> chrono::Duration::from_std(Duration::from_secs(15)).unwrap() =>
{
let res = thread_shared.ctx.notification_manager
.notify(
&mut thread_shared.ctx.db.handle(),
&mut db,
Some(thread_shared.manifest.id.clone()),
NotificationLevel::Warning,
String::from("Service Crashed"),
format!("The service {} has crashed with the following exit code: {}\nDetails: {}", thread_shared.manifest.id.clone(), e.0, e.1),
(),
Some(900) // 15 minutes
Some(3600) // 1 hour
)
.await;
match res {
Err(e) => {
tracing::error!("Failed to issue notification: {}", e);
tracing::debug!("{:?}", e);
match res {
Err(e) => {
tracing::error!("Failed to issue notification: {}", e);
tracing::debug!("{:?}", e);
}
Ok(()) => {}
}
}
Ok(()) => {}
_ => tracing::error!("service just started. not issuing crash notification"),
}
tracing::error!("service crashed: {}: {}", e.0, e.1);
tokio::time::sleep(Duration::from_secs(15)).await;