Files
start-os/sdk/package/lib/mainFn/Daemon.ts
Aiden McClelland 9ff65497a8 fix: replace fire-and-forget restart loop in Daemon with tracked AbortController
- Track the restart loop as an awaitable { abort, done } handle
- Remove shouldBeRunning flag — signal.aborted serves the same purpose
- Remove exiting field — term() awaits command termination inline
- Guard start() on loop existence to prevent concurrent restart loops
- Make backoff sleep abortable so term() returns immediately
- Suppress error logging during intentional termination
- Loop clears its own handle in finally block for natural exit (oneshot)
2026-03-20 14:31:46 -06:00

197 lines
6.4 KiB
TypeScript

import * as T from '../../../base/lib/types'
import { asError } from '../../../base/lib/util/asError'
import { logErrorOnce } from '../../../base/lib/util/logErrorOnce'
import { Drop } from '../util'
import { SubContainer, SubContainerRc } from '../util/SubContainer'
import { CommandController } from './CommandController'
import { DaemonCommandType } from './Daemons'
import { Oneshot } from './Oneshot'
const TIMEOUT_INCREMENT_MS = 1000
const MAX_TIMEOUT_MS = 30000
/**
* A managed long-running process wrapper around {@link CommandController}.
*
* When started, the daemon automatically restarts its underlying command on failure
* with exponential backoff (up to 30 seconds). When stopped, the command is terminated
* gracefully. Implements {@link Drop} for automatic cleanup when the context is left.
*
* @typeParam Manifest - The service manifest type
* @typeParam C - The subcontainer type, or `null` for JS-only daemons
*/
export class Daemon<
Manifest extends T.SDKManifest,
C extends SubContainer<Manifest> | null = SubContainer<Manifest> | null,
> extends Drop {
private commandController: CommandController<Manifest, C> | null = null
protected exitedSuccess = false
private onExitFns: ((success: boolean) => void)[] = []
private loop: { abort: AbortController; done: Promise<void> } | null = null
protected constructor(
private subcontainer: C,
private startCommand: () => Promise<CommandController<Manifest, C>>,
readonly oneshot: boolean = false,
) {
super()
}
/** Returns true if this daemon is a one-shot process (exits after success) */
isOneshot(): this is Oneshot<Manifest> {
return this.oneshot
}
/**
* Factory method to create a new Daemon.
*
* Returns a curried function: `(effects, subcontainer, exec) => Daemon`.
* The daemon auto-terminates when the effects context is left.
*/
static of<Manifest extends T.SDKManifest>() {
return <C extends SubContainer<Manifest> | null>(
effects: T.Effects,
subcontainer: C,
exec: DaemonCommandType<Manifest, C>,
) => {
let subc: SubContainer<Manifest> | null = subcontainer
if (subcontainer && subcontainer.isOwned()) subc = subcontainer.rc()
const startCommand = () =>
CommandController.of<Manifest, C>()(
effects,
(subc?.rc() ?? null) as C,
exec,
)
const res = new Daemon(subc, startCommand)
effects.onLeaveContext(() => {
res.term({ destroySubcontainer: true }).catch((e) => logErrorOnce(e))
})
return res
}
}
/**
* Start the daemon. If it is already running, this is a no-op.
*
* The daemon will automatically restart on failure with increasing backoff
* until {@link term} is called.
*/
async start() {
if (this.loop) {
return
}
const abort = new AbortController()
const done = this.runLoop(abort.signal)
this.loop = { abort, done }
}
private async runLoop(signal: AbortSignal) {
let timeoutCounter = 0
try {
while (!signal.aborted) {
if (this.commandController) {
await this.commandController.term({}).catch(logErrorOnce)
this.commandController = null
}
try {
this.commandController = await this.startCommand()
if (signal.aborted) {
await this.commandController.term({}).catch(logErrorOnce)
this.commandController = null
break
}
const success = await this.commandController.wait().then(
(_) => true,
(err) => {
if (!signal.aborted) logErrorOnce(err)
return false
},
)
this.commandController = null
if (signal.aborted) break
for (const fn of this.onExitFns) {
try {
fn(success)
} catch (e) {
console.error('EXIT handler', e)
}
}
if (success && this.oneshot) {
this.exitedSuccess = true
break
}
} catch (e) {
if (!signal.aborted) console.error(e)
}
if (signal.aborted) break
await new Promise<void>((resolve) => {
const timer = setTimeout(resolve, timeoutCounter)
signal.addEventListener(
'abort',
() => {
clearTimeout(timer)
resolve()
},
{ once: true },
)
})
timeoutCounter += TIMEOUT_INCREMENT_MS
timeoutCounter = Math.min(MAX_TIMEOUT_MS, timeoutCounter)
}
} finally {
this.loop = null
}
}
/**
* Terminate the daemon, stopping its underlying command.
*
* Sends the configured signal (default SIGTERM) and waits for the process to exit.
* Optionally destroys the subcontainer after termination.
*
* @param termOptions - Optional termination settings
* @param termOptions.signal - The signal to send (default: SIGTERM)
* @param termOptions.timeout - Milliseconds to wait before SIGKILL
* @param termOptions.destroySubcontainer - Whether to destroy the subcontainer after exit
*/
async term(termOptions?: {
signal?: NodeJS.Signals | undefined
timeout?: number | undefined
destroySubcontainer?: boolean
}) {
this.exitedSuccess = false
this.onExitFns = []
if (this.loop) {
this.loop.abort.abort()
}
const exiting = this.commandController?.term({ ...termOptions })
this.commandController = null
if (exiting) await exiting.catch(logErrorOnce)
if (this.loop) {
await this.loop.done
}
if (termOptions?.destroySubcontainer) {
await this.subcontainer?.destroy()
}
}
/** Get a reference-counted handle to the daemon's subcontainer, or null if there is none */
subcontainerRc(): SubContainerRc<Manifest> | null {
return this.subcontainer?.rc() ?? null
}
/** Check whether this daemon shares the same subcontainer as another daemon */
sharesSubcontainerWith(
other: Daemon<Manifest, SubContainer<Manifest> | null>,
): boolean {
return this.subcontainer?.guid === other.subcontainer?.guid
}
/**
* Register a callback to be invoked each time the daemon's process exits.
* @param fn - Callback receiving `true` on clean exit, `false` on error
*/
onExit(fn: (success: boolean) => void) {
this.onExitFns.push(fn)
}
onDrop(): void {
this.term().catch((e) => logErrorOnce(asError(e)))
}
}