Files
start-os/sdk/package/lib/mainFn/Daemon.ts
Aiden McClelland 456c5d6725 fix: graceful shutdown for subcontainer daemons
Two issues fixed:

1. Process group cascade: exec-command processes inherited the
   container runtime's process group. When an entrypoint script
   did kill(0, SIGTERM) during shutdown, it signaled ALL processes
   in the group — including other subcontainers' launch wrappers,
   causing their PID namespaces to collapse. Fixed by calling
   setsid() in exec-command's pre_exec to isolate each service
   in its own process group.

2. Unordered daemon termination: removeChild("main") fired
   onLeaveContext callbacks for all Daemon.of() instances
   simultaneously, bypassing Daemons.term()'s reverse-dependency
   ordering. Fixed by having Daemons.build() mark individual
   daemons as managed (suppressing their onLeaveContext) and
   registering a single onLeaveContext that calls the ordered
   Daemons.term(). The term() method is deduplicated so
   system.stop() and onLeaveContext share the same shutdown.
2026-03-21 18:20:50 -06:00

209 lines
6.7 KiB
TypeScript

import * as T from '../../../base/lib/types'
import { asError } from '../../../base/lib/util/asError'
import { logErrorOnce } from '../../../base/lib/util/logErrorOnce'
import { Drop } from '../util'
import { SubContainer, SubContainerRc } from '../util/SubContainer'
import { CommandController } from './CommandController'
import { DaemonCommandType } from './Daemons'
import { Oneshot } from './Oneshot'
const TIMEOUT_INCREMENT_MS = 1000
const MAX_TIMEOUT_MS = 30000
/**
* A managed long-running process wrapper around {@link CommandController}.
*
* When started, the daemon automatically restarts its underlying command on failure
* with exponential backoff (up to 30 seconds). When stopped, the command is terminated
* gracefully. Implements {@link Drop} for automatic cleanup when the context is left.
*
* @typeParam Manifest - The service manifest type
* @typeParam C - The subcontainer type, or `null` for JS-only daemons
*/
export class Daemon<
Manifest extends T.SDKManifest,
C extends SubContainer<Manifest> | null = SubContainer<Manifest> | null,
> extends Drop {
private commandController: CommandController<Manifest, C> | null = null
protected exitedSuccess = false
private onExitFns: ((success: boolean) => void)[] = []
private loop: { abort: AbortController; done: Promise<void> } | null = null
private _managed = false
protected constructor(
private subcontainer: C,
private startCommand: () => Promise<CommandController<Manifest, C>>,
readonly oneshot: boolean = false,
) {
super()
}
/** Returns true if this daemon is a one-shot process (exits after success) */
isOneshot(): this is Oneshot<Manifest> {
return this.oneshot
}
/**
* Factory method to create a new Daemon.
*
* Returns a curried function: `(effects, subcontainer, exec) => Daemon`.
* Registers an `onLeaveContext` callback that terminates the daemon when the
* effects context is left.
*/
static of<Manifest extends T.SDKManifest>() {
return <C extends SubContainer<Manifest> | null>(
effects: T.Effects,
subcontainer: C,
exec: DaemonCommandType<Manifest, C>,
) => {
let subc: SubContainer<Manifest> | null = subcontainer
if (subcontainer && subcontainer.isOwned()) subc = subcontainer.rc()
const startCommand = () =>
CommandController.of<Manifest, C>()(
effects,
(subc?.rc() ?? null) as C,
exec,
)
const res = new Daemon(subc, startCommand)
effects.onLeaveContext(() => {
if (!res._managed) {
res.term({ destroySubcontainer: true }).catch((e) => logErrorOnce(e))
}
})
return res
}
}
/**
* Start the daemon. If it is already running, this is a no-op.
*
* The daemon will automatically restart on failure with increasing backoff
* until {@link term} is called.
*/
async start() {
if (this.loop) {
return
}
const abort = new AbortController()
const done = this.runLoop(abort.signal)
this.loop = { abort, done }
}
private async runLoop(signal: AbortSignal) {
let timeoutCounter = 0
try {
while (!signal.aborted) {
if (this.commandController) {
await this.commandController.term({}).catch(logErrorOnce)
this.commandController = null
}
try {
this.commandController = await this.startCommand()
if (signal.aborted) {
await this.commandController.term({}).catch(logErrorOnce)
this.commandController = null
break
}
const success = await this.commandController.wait().then(
(_) => true,
(err) => {
if (!signal.aborted) logErrorOnce(err)
return false
},
)
this.commandController = null
if (signal.aborted) break
for (const fn of this.onExitFns) {
try {
fn(success)
} catch (e) {
console.error('EXIT handler', e)
}
}
if (success && this.oneshot) {
this.exitedSuccess = true
break
}
} catch (e) {
if (!signal.aborted) console.error(e)
}
if (signal.aborted) break
await new Promise<void>((resolve) => {
const timer = setTimeout(resolve, timeoutCounter)
signal.addEventListener(
'abort',
() => {
clearTimeout(timer)
resolve()
},
{ once: true },
)
})
timeoutCounter += TIMEOUT_INCREMENT_MS
timeoutCounter = Math.min(MAX_TIMEOUT_MS, timeoutCounter)
}
} finally {
this.loop = null
}
}
/**
* Terminate the daemon, stopping its underlying command.
*
* Sends the configured signal (default SIGTERM) and waits for the process to exit.
* Optionally destroys the subcontainer after termination.
*
* @param termOptions - Optional termination settings
* @param termOptions.signal - The signal to send (default: SIGTERM)
* @param termOptions.timeout - Milliseconds to wait before SIGKILL
* @param termOptions.destroySubcontainer - Whether to destroy the subcontainer after exit
*/
async term(termOptions?: {
signal?: NodeJS.Signals | undefined
timeout?: number | undefined
destroySubcontainer?: boolean
}) {
this.exitedSuccess = false
this.onExitFns = []
if (this.loop) {
this.loop.abort.abort()
}
const exiting = this.commandController?.term({ ...termOptions })
this.commandController = null
if (exiting) await exiting.catch(logErrorOnce)
if (this.loop) {
await this.loop.done
}
if (termOptions?.destroySubcontainer) {
await this.subcontainer?.destroy()
}
}
/**
* Mark this daemon as managed by a {@link Daemons} instance.
* Suppresses the individual `onLeaveContext` termination since the
* `Daemons` instance handles ordered shutdown.
*/
markManaged() {
this._managed = true
}
/** Get a reference-counted handle to the daemon's subcontainer, or null if there is none */
subcontainerRc(): SubContainerRc<Manifest> | null {
return this.subcontainer?.rc() ?? null
}
/** Check whether this daemon shares the same subcontainer as another daemon */
sharesSubcontainerWith(
other: Daemon<Manifest, SubContainer<Manifest> | null>,
): boolean {
return this.subcontainer?.guid === other.subcontainer?.guid
}
/**
* Register a callback to be invoked each time the daemon's process exits.
* @param fn - Callback receiving `true` on clean exit, `false` on error
*/
onExit(fn: (success: boolean) => void) {
this.onExitFns.push(fn)
}
onDrop(): void {
this.term().catch((e) => logErrorOnce(asError(e)))
}
}