mirror of
https://github.com/Start9Labs/start-os.git
synced 2026-03-26 02:11:53 +00:00
Two issues fixed:
1. Process group cascade: exec-command processes inherited the
container runtime's process group. When an entrypoint script
did kill(0, SIGTERM) during shutdown, it signaled ALL processes
in the group — including other subcontainers' launch wrappers,
causing their PID namespaces to collapse. Fixed by calling
setsid() in exec-command's pre_exec to isolate each service
in its own process group.
2. Unordered daemon termination: removeChild("main") fired
onLeaveContext callbacks for all Daemon.of() instances
simultaneously, bypassing Daemons.term()'s reverse-dependency
ordering. Fixed by having Daemons.build() mark individual
daemons as managed (suppressing their onLeaveContext) and
registering a single onLeaveContext that calls the ordered
Daemons.term(). The term() method is deduplicated so
system.stop() and onLeaveContext share the same shutdown.
209 lines
6.7 KiB
TypeScript
209 lines
6.7 KiB
TypeScript
import * as T from '../../../base/lib/types'
|
|
import { asError } from '../../../base/lib/util/asError'
|
|
import { logErrorOnce } from '../../../base/lib/util/logErrorOnce'
|
|
import { Drop } from '../util'
|
|
import { SubContainer, SubContainerRc } from '../util/SubContainer'
|
|
import { CommandController } from './CommandController'
|
|
import { DaemonCommandType } from './Daemons'
|
|
import { Oneshot } from './Oneshot'
|
|
|
|
const TIMEOUT_INCREMENT_MS = 1000
|
|
const MAX_TIMEOUT_MS = 30000
|
|
/**
|
|
* A managed long-running process wrapper around {@link CommandController}.
|
|
*
|
|
* When started, the daemon automatically restarts its underlying command on failure
|
|
* with exponential backoff (up to 30 seconds). When stopped, the command is terminated
|
|
* gracefully. Implements {@link Drop} for automatic cleanup when the context is left.
|
|
*
|
|
* @typeParam Manifest - The service manifest type
|
|
* @typeParam C - The subcontainer type, or `null` for JS-only daemons
|
|
*/
|
|
export class Daemon<
|
|
Manifest extends T.SDKManifest,
|
|
C extends SubContainer<Manifest> | null = SubContainer<Manifest> | null,
|
|
> extends Drop {
|
|
private commandController: CommandController<Manifest, C> | null = null
|
|
protected exitedSuccess = false
|
|
private onExitFns: ((success: boolean) => void)[] = []
|
|
private loop: { abort: AbortController; done: Promise<void> } | null = null
|
|
private _managed = false
|
|
protected constructor(
|
|
private subcontainer: C,
|
|
private startCommand: () => Promise<CommandController<Manifest, C>>,
|
|
readonly oneshot: boolean = false,
|
|
) {
|
|
super()
|
|
}
|
|
/** Returns true if this daemon is a one-shot process (exits after success) */
|
|
isOneshot(): this is Oneshot<Manifest> {
|
|
return this.oneshot
|
|
}
|
|
/**
|
|
* Factory method to create a new Daemon.
|
|
*
|
|
* Returns a curried function: `(effects, subcontainer, exec) => Daemon`.
|
|
* Registers an `onLeaveContext` callback that terminates the daemon when the
|
|
* effects context is left.
|
|
*/
|
|
static of<Manifest extends T.SDKManifest>() {
|
|
return <C extends SubContainer<Manifest> | null>(
|
|
effects: T.Effects,
|
|
subcontainer: C,
|
|
exec: DaemonCommandType<Manifest, C>,
|
|
) => {
|
|
let subc: SubContainer<Manifest> | null = subcontainer
|
|
if (subcontainer && subcontainer.isOwned()) subc = subcontainer.rc()
|
|
const startCommand = () =>
|
|
CommandController.of<Manifest, C>()(
|
|
effects,
|
|
(subc?.rc() ?? null) as C,
|
|
exec,
|
|
)
|
|
const res = new Daemon(subc, startCommand)
|
|
effects.onLeaveContext(() => {
|
|
if (!res._managed) {
|
|
res.term({ destroySubcontainer: true }).catch((e) => logErrorOnce(e))
|
|
}
|
|
})
|
|
return res
|
|
}
|
|
}
|
|
/**
|
|
* Start the daemon. If it is already running, this is a no-op.
|
|
*
|
|
* The daemon will automatically restart on failure with increasing backoff
|
|
* until {@link term} is called.
|
|
*/
|
|
async start() {
|
|
if (this.loop) {
|
|
return
|
|
}
|
|
const abort = new AbortController()
|
|
const done = this.runLoop(abort.signal)
|
|
this.loop = { abort, done }
|
|
}
|
|
|
|
private async runLoop(signal: AbortSignal) {
|
|
let timeoutCounter = 0
|
|
try {
|
|
while (!signal.aborted) {
|
|
if (this.commandController) {
|
|
await this.commandController.term({}).catch(logErrorOnce)
|
|
this.commandController = null
|
|
}
|
|
try {
|
|
this.commandController = await this.startCommand()
|
|
if (signal.aborted) {
|
|
await this.commandController.term({}).catch(logErrorOnce)
|
|
this.commandController = null
|
|
break
|
|
}
|
|
const success = await this.commandController.wait().then(
|
|
(_) => true,
|
|
(err) => {
|
|
if (!signal.aborted) logErrorOnce(err)
|
|
return false
|
|
},
|
|
)
|
|
this.commandController = null
|
|
if (signal.aborted) break
|
|
for (const fn of this.onExitFns) {
|
|
try {
|
|
fn(success)
|
|
} catch (e) {
|
|
console.error('EXIT handler', e)
|
|
}
|
|
}
|
|
if (success && this.oneshot) {
|
|
this.exitedSuccess = true
|
|
break
|
|
}
|
|
} catch (e) {
|
|
if (!signal.aborted) console.error(e)
|
|
}
|
|
if (signal.aborted) break
|
|
await new Promise<void>((resolve) => {
|
|
const timer = setTimeout(resolve, timeoutCounter)
|
|
signal.addEventListener(
|
|
'abort',
|
|
() => {
|
|
clearTimeout(timer)
|
|
resolve()
|
|
},
|
|
{ once: true },
|
|
)
|
|
})
|
|
timeoutCounter += TIMEOUT_INCREMENT_MS
|
|
timeoutCounter = Math.min(MAX_TIMEOUT_MS, timeoutCounter)
|
|
}
|
|
} finally {
|
|
this.loop = null
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Terminate the daemon, stopping its underlying command.
|
|
*
|
|
* Sends the configured signal (default SIGTERM) and waits for the process to exit.
|
|
* Optionally destroys the subcontainer after termination.
|
|
*
|
|
* @param termOptions - Optional termination settings
|
|
* @param termOptions.signal - The signal to send (default: SIGTERM)
|
|
* @param termOptions.timeout - Milliseconds to wait before SIGKILL
|
|
* @param termOptions.destroySubcontainer - Whether to destroy the subcontainer after exit
|
|
*/
|
|
async term(termOptions?: {
|
|
signal?: NodeJS.Signals | undefined
|
|
timeout?: number | undefined
|
|
destroySubcontainer?: boolean
|
|
}) {
|
|
this.exitedSuccess = false
|
|
this.onExitFns = []
|
|
|
|
if (this.loop) {
|
|
this.loop.abort.abort()
|
|
}
|
|
|
|
const exiting = this.commandController?.term({ ...termOptions })
|
|
this.commandController = null
|
|
if (exiting) await exiting.catch(logErrorOnce)
|
|
|
|
if (this.loop) {
|
|
await this.loop.done
|
|
}
|
|
|
|
if (termOptions?.destroySubcontainer) {
|
|
await this.subcontainer?.destroy()
|
|
}
|
|
}
|
|
/**
|
|
* Mark this daemon as managed by a {@link Daemons} instance.
|
|
* Suppresses the individual `onLeaveContext` termination since the
|
|
* `Daemons` instance handles ordered shutdown.
|
|
*/
|
|
markManaged() {
|
|
this._managed = true
|
|
}
|
|
/** Get a reference-counted handle to the daemon's subcontainer, or null if there is none */
|
|
subcontainerRc(): SubContainerRc<Manifest> | null {
|
|
return this.subcontainer?.rc() ?? null
|
|
}
|
|
/** Check whether this daemon shares the same subcontainer as another daemon */
|
|
sharesSubcontainerWith(
|
|
other: Daemon<Manifest, SubContainer<Manifest> | null>,
|
|
): boolean {
|
|
return this.subcontainer?.guid === other.subcontainer?.guid
|
|
}
|
|
/**
|
|
* Register a callback to be invoked each time the daemon's process exits.
|
|
* @param fn - Callback receiving `true` on clean exit, `false` on error
|
|
*/
|
|
onExit(fn: (success: boolean) => void) {
|
|
this.onExitFns.push(fn)
|
|
}
|
|
onDrop(): void {
|
|
this.term().catch((e) => logErrorOnce(asError(e)))
|
|
}
|
|
}
|