fix: graceful shutdown for subcontainer daemons

Two issues fixed:

1. Process group cascade: exec-command processes inherited the
   container runtime's process group. When an entrypoint script
   did kill(0, SIGTERM) during shutdown, it signaled ALL processes
   in the group — including other subcontainers' launch wrappers,
   causing their PID namespaces to collapse. Fixed by calling
   setsid() in exec-command's pre_exec to isolate each service
   in its own process group.

2. Unordered daemon termination: removeChild("main") fired
   onLeaveContext callbacks for all Daemon.of() instances
   simultaneously, bypassing Daemons.term()'s reverse-dependency
   ordering. Fixed by having Daemons.build() mark individual
   daemons as managed (suppressing their onLeaveContext) and
   registering a single onLeaveContext that calls the ordered
   Daemons.term(). The term() method is deduplicated so
   system.stop() and onLeaveContext share the same shutdown.
This commit is contained in:
Aiden McClelland
2026-03-21 18:19:41 -06:00
parent bdfa918a33
commit 456c5d6725
3 changed files with 30 additions and 2 deletions

View File

@@ -283,6 +283,10 @@ impl ExecParams {
let set_gid = gid.ok();
unsafe {
cmd.pre_exec(move || {
// Create a new process group so entrypoint scripts that do
// kill(0, SIGTERM) don't cascade to other subcontainers.
nix::unistd::setsid()
.map_err(|e| std::io::Error::from_raw_os_error(e as i32))?;
if !groups.is_empty() {
nix::unistd::setgroups(&groups)
.map_err(|e| std::io::Error::from_raw_os_error(e as i32))?;

View File

@@ -27,6 +27,7 @@ export class Daemon<
protected exitedSuccess = false
private onExitFns: ((success: boolean) => void)[] = []
private loop: { abort: AbortController; done: Promise<void> } | null = null
private _managed = false
protected constructor(
private subcontainer: C,
private startCommand: () => Promise<CommandController<Manifest, C>>,
@@ -42,7 +43,8 @@ export class Daemon<
* Factory method to create a new Daemon.
*
* Returns a curried function: `(effects, subcontainer, exec) => Daemon`.
* The daemon auto-terminates when the effects context is left.
* Registers an `onLeaveContext` callback that terminates the daemon when the
* effects context is left.
*/
static of<Manifest extends T.SDKManifest>() {
return <C extends SubContainer<Manifest> | null>(
@@ -60,7 +62,9 @@ export class Daemon<
)
const res = new Daemon(subc, startCommand)
effects.onLeaveContext(() => {
if (!res._managed) {
res.term({ destroySubcontainer: true }).catch((e) => logErrorOnce(e))
}
})
return res
}
@@ -173,6 +177,14 @@ export class Daemon<
await this.subcontainer?.destroy()
}
}
/**
* Mark this daemon as managed by a {@link Daemons} instance.
* Suppresses the individual `onLeaveContext` termination since the
* `Daemons` instance handles ordered shutdown.
*/
markManaged() {
this._managed = true
}
/** Get a reference-counted handle to the daemon's subcontainer, or null if there is none */
subcontainerRc(): SubContainerRc<Manifest> | null {
return this.subcontainer?.rc() ?? null

View File

@@ -176,6 +176,7 @@ Daemons.of({
export class Daemons<Manifest extends T.SDKManifest, Ids extends string>
implements T.DaemonBuildable
{
private termPromise: Promise<void> | null = null
private constructor(
readonly effects: T.Effects,
readonly ids: Ids[],
@@ -413,6 +414,13 @@ export class Daemons<Manifest extends T.SDKManifest, Ids extends string>
* if a dependency cycle is detected.
*/
async term() {
if (!this.termPromise) {
this.termPromise = this._term()
}
return this.termPromise
}
private async _term() {
const remaining = new Set(this.healthDaemons)
while (remaining.size > 0) {
@@ -459,7 +467,11 @@ export class Daemons<Manifest extends T.SDKManifest, Ids extends string>
* @returns This `Daemons` instance, now running
*/
async build() {
this.effects.onLeaveContext(() => {
this.term().catch((e) => console.error(e))
})
for (const daemon of this.healthDaemons) {
daemon.daemon?.markManaged()
await daemon.updateStatus()
}
return this