import * as fs from 'fs/promises' import * as T from '../../../base/lib/types' import * as cp from 'child_process' import { promisify } from 'util' import { Buffer } from 'node:buffer' import { once } from '../../../base/lib/util/once' import { Drop } from '../../../base/lib/util/Drop' import { Mounts } from '../mainFn/Mounts' import { BackupEffects } from '../backup/Backups' import { PathBase } from './Volume' export const execFile = promisify(cp.execFile) const False = () => false type ExecResults = { exitCode: number | null exitSignal: NodeJS.Signals | null stdout: string | Buffer stderr: string | Buffer } export type ExecOptions = { input?: string | Buffer } const TIMES_TO_WAIT_FOR_PROC = 100 async function prepBind( from: string | null, to: string, type: 'file' | 'directory' | 'infer', ) { const fromMeta = from ? await fs.stat(from).catch((_) => null) : null const toMeta = await fs.stat(to).catch((_) => null) if (type === 'file' || (type === 'infer' && from && fromMeta?.isFile())) { if (toMeta && toMeta.isDirectory()) await fs.rmdir(to, { recursive: false }) if (from && !fromMeta) { await fs.mkdir(from.replace(/\/[^\/]*\/?$/, ''), { recursive: true }) await fs.writeFile(from, '') } if (!toMeta) { await fs.mkdir(to.replace(/\/[^\/]*\/?$/, ''), { recursive: true }) await fs.writeFile(to, '') } } else { if (toMeta && toMeta.isFile() && !toMeta.size) await fs.rm(to) if (from && !fromMeta) await fs.mkdir(from, { recursive: true }) if (!toMeta) await fs.mkdir(to, { recursive: true }) } } async function bind( from: string, to: string, type: 'file' | 'directory' | 'infer', idmap: IdMap[], ) { await prepBind(from, to, type) const args = ['--bind'] if (idmap.length) { args.push( `-oX-mount.idmap=${idmap.map((i) => `b:${i.fromId}:${i.toId}:${i.range}`).join(' ')}`, ) } await execFile('mount', [...args, from, to]) } /** * Interface representing an isolated container environment for running service processes. * * Provides methods for executing commands, spawning processes, mounting filesystems, * and writing files within the container's rootfs. Comes in two flavors: * {@link SubContainerOwned} (owns the underlying filesystem) and * {@link SubContainerRc} (reference-counted handle to a shared container). */ export interface SubContainer< Manifest extends T.SDKManifest, Effects extends T.Effects = T.Effects, > extends Drop, PathBase { readonly imageId: keyof Manifest['images'] & T.ImageId readonly rootfs: string readonly guid: T.Guid /** * Get the absolute path to a file or directory within this subcontainer's rootfs * @param path Path relative to the rootfs */ subpath(path: string): string /** * Apply filesystem mounts (volumes, assets, dependencies, backups) to this subcontainer. * @param mounts - The Mounts configuration to apply * @returns This subcontainer instance for chaining */ mount( mounts: Effects extends BackupEffects ? Mounts< Manifest, { subpath: string | null mountpoint: string } > : Mounts, ): Promise /** Destroy this subcontainer and clean up its filesystem */ destroy: () => Promise /** * @description run a command inside this subcontainer * DOES NOT THROW ON NONZERO EXIT CODE (see execFail) * @param commands an array representing the command and args to execute * @param options * @param timeoutMs how long to wait before killing the command in ms * @returns */ exec( command: string[], options?: CommandOptions & ExecOptions, timeoutMs?: number | null, abort?: AbortController, ): Promise<{ throw: () => { stdout: string | Buffer; stderr: string | Buffer } exitCode: number | null exitSignal: NodeJS.Signals | null stdout: string | Buffer stderr: string | Buffer }> /** * @description run a command inside this subcontainer, throwing on non-zero exit status * @param commands an array representing the command and args to execute * @param options * @param timeoutMs how long to wait before killing the command in ms * @returns */ execFail( command: string[], options?: CommandOptions & ExecOptions, timeoutMs?: number | null, abort?: AbortController, ): Promise<{ stdout: string | Buffer stderr: string | Buffer }> /** * Launch a command as the init (PID 1) process of the subcontainer. * Replaces the current leader process. * @param command - The command and arguments to execute * @param options - Optional environment, working directory, and user overrides */ launch( command: string[], options?: CommandOptions, ): Promise /** * Spawn a command inside the subcontainer as a non-init process. * @param command - The command and arguments to execute * @param options - Optional environment, working directory, user, and stdio overrides */ spawn( command: string[], options?: CommandOptions & StdioOptions, ): Promise /** * @description Write a file to the subcontainer's filesystem * @param path Path relative to the subcontainer rootfs (e.g. "/etc/config.json") * @param data The data to write * @param options Optional write options (same as node:fs/promises writeFile) */ writeFile( path: string, data: | string | NodeJS.ArrayBufferView | Iterable | AsyncIterable, options?: Parameters[2], ): Promise /** * Create a reference-counted handle to this subcontainer. * The underlying container is only destroyed when all handles are released. */ rc(): SubContainerRc /** Returns true if this is an owned subcontainer (not a reference-counted handle) */ isOwned(): this is SubContainerOwned } /** * Want to limit what we can do in a container, so we want to launch a container with a specific image and the mounts. */ export class SubContainerOwned< Manifest extends T.SDKManifest, Effects extends T.Effects = T.Effects, > extends Drop implements SubContainer { private destroyed = false public rcs = 0 private leader: cp.ChildProcess private leaderExited: boolean = false private waitProc: () => Promise private constructor( readonly effects: Effects, readonly imageId: keyof Manifest['images'] & T.ImageId, readonly rootfs: string, readonly guid: T.Guid, ) { super() this.leaderExited = false this.leader = cp.spawn( 'start-container', ['subcontainer', 'launch', rootfs], { killSignal: 'SIGKILL', stdio: 'inherit', }, ) this.leader.on('exit', () => { this.leaderExited = true }) this.waitProc = once( () => new Promise(async (resolve, reject) => { let count = 0 while ( !(await fs.stat(`${this.rootfs}/proc/1`).then((x) => !!x, False)) ) { if (count++ > TIMES_TO_WAIT_FOR_PROC) { console.debug('Failed to start subcontainer', { guid: this.guid, imageId: this.imageId, rootfs: this.rootfs, }) return reject( new Error(`Failed to start subcontainer ${this.imageId}`), ) } await wait(1) } resolve(null) }), ) } static async of( effects: Effects, image: { imageId: keyof Manifest['images'] & T.ImageId sharedRun?: boolean }, mounts: | (Effects extends BackupEffects ? Mounts< Manifest, { subpath: string | null mountpoint: string } > : Mounts) | null, name: string, ): Promise> { const { imageId, sharedRun } = image const [rootfs, guid] = await effects.subcontainer.createFs({ imageId, name, }) const res = new SubContainerOwned(effects, imageId, rootfs, guid) try { if (mounts) { await res.mount(mounts) } const shared = ['dev', 'sys'] if (!!sharedRun) { shared.push('run') } await fs.mkdir(`${rootfs}/etc`, { recursive: true }) await fs.copyFile('/etc/resolv.conf', `${rootfs}/etc/resolv.conf`) for (const dirPart of shared) { const from = `/${dirPart}` const to = `${rootfs}/${dirPart}` await fs.mkdir(from, { recursive: true }) await fs.mkdir(to, { recursive: true }) await execFile('mount', ['--rbind', from, to]) } return res } catch (e) { await res.destroy() throw e } } static async withTemp< Manifest extends T.SDKManifest, T, Effects extends T.Effects, >( effects: Effects, image: { imageId: keyof Manifest['images'] & T.ImageId sharedRun?: boolean }, mounts: | (Effects extends BackupEffects ? Mounts< Manifest, { subpath: string | null mountpoint: string } > : Mounts) | null, name: string, fn: (subContainer: SubContainer) => Promise, ): Promise { const subContainer = await SubContainerOwned.of( effects, image, mounts, name, ) try { return await fn(subContainer) } finally { await subContainer.destroy() } } subpath(path: string): string { return path.startsWith('/') ? `${this.rootfs}${path}` : `${this.rootfs}/${path}` } async mount( mounts: Effects extends BackupEffects ? Mounts< Manifest, { subpath: string | null mountpoint: string } > : Mounts, ): Promise { for (let mount of mounts.build()) { let { options, mountpoint } = mount const path = mountpoint.startsWith('/') ? `${this.rootfs}${mountpoint}` : `${this.rootfs}/${mountpoint}` if (options.type === 'volume') { const subpath = options.subpath ? options.subpath.startsWith('/') ? options.subpath : `/${options.subpath}` : '/' const from = `/media/startos/volumes/${options.volumeId}${subpath}` await bind(from, path, options.filetype, options.idmap) } else if (options.type === 'assets') { const subpath = options.subpath ? options.subpath.startsWith('/') ? options.subpath : `/${options.subpath}` : '/' const from = `/media/startos/assets/${subpath}` await bind(from, path, options.filetype, options.idmap) } else if (options.type === 'pointer') { await prepBind(null, path, 'directory') await this.effects.mount({ location: path, target: options }) } else if (options.type === 'backup') { const subpath = options.subpath ? options.subpath.startsWith('/') ? options.subpath : `/${options.subpath}` : '/' const from = `/media/startos/backup${subpath}` await bind(from, path, options.filetype, options.idmap) } else { throw new Error(`unknown type ${(options as any).type}`) } } return this } private async killLeader() { if (this.leaderExited) { return } return new Promise((resolve, reject) => { try { let timeout = setTimeout(() => this.leader.kill('SIGKILL'), 30000) this.leader.on('exit', () => { clearTimeout(timeout) resolve(null) }) if (!this.leader.kill('SIGTERM')) { reject(new Error('kill(2) failed')) } } catch (e) { reject(e) } }) } get destroy() { return async () => { if (!this.destroyed) { const guid = this.guid await this.killLeader() await this.effects.subcontainer.destroyFs({ guid }) this.destroyed = true } return null } } onDrop(): void { console.log(`Cleaning up dangling subcontainer ${this.guid}`) this.destroy() } /** * @description run a command inside this subcontainer * DOES NOT THROW ON NONZERO EXIT CODE (see execFail) * @param commands an array representing the command and args to execute * @param options * @param timeoutMs how long to wait before killing the command in ms * @returns */ async exec( command: string[], options?: CommandOptions & ExecOptions, timeoutMs: number | null = 30000, abort?: AbortController, ): Promise<{ throw: () => { stdout: string | Buffer; stderr: string | Buffer } exitCode: number | null exitSignal: NodeJS.Signals | null stdout: string | Buffer stderr: string | Buffer }> { await this.waitProc() const imageMeta: T.ImageMetadata = await fs .readFile(`/media/startos/images/${this.imageId}.json`, { encoding: 'utf8', }) .catch(() => '{}') .then(JSON.parse) let extra: string[] = [] let user = imageMeta.user || 'root' if (options?.user) { user = options.user delete options.user } let workdir = imageMeta.workdir || '/' if (options?.cwd) { workdir = options.cwd delete options.cwd } if (options?.env) { for (let [k, v] of Object.entries(options.env)) { extra.push(`--env=${k}=${v}`) } } const child = cp.spawn( 'start-container', [ 'subcontainer', 'exec', `--env-file=/media/startos/images/${this.imageId}.env`, `--user=${user}`, `--workdir=${workdir}`, ...extra, this.rootfs, ...command, ], options || {}, ) abort?.signal.addEventListener('abort', () => child.kill('SIGKILL')) if (options?.input) { await new Promise((resolve, reject) => { try { child.stdin.on('error', (e) => reject(e)) child.stdin.write(options.input, (e) => { if (e) { reject(e) } else { resolve(null) } }) } catch (e) { reject(e) } }) await new Promise((resolve, reject) => { try { child.stdin.end(resolve) } catch (e) { reject(e) } }) } const stdout = { data: '' as string } const stderr = { data: '' as string } const appendData = (appendTo: { data: string }) => (chunk: string | Buffer | any) => { if (typeof chunk === 'string' || chunk instanceof Buffer) { appendTo.data += chunk.toString() } else { console.error('received unexpected chunk', chunk) } } return new Promise((resolve, reject) => { child.on('error', reject) let killTimeout: NodeJS.Timeout | undefined if (timeoutMs !== null && child.pid) { killTimeout = setTimeout(() => child.kill('SIGKILL'), timeoutMs) } child.stdout.on('data', appendData(stdout)) child.stderr.on('data', appendData(stderr)) child.on('exit', (code, signal) => { clearTimeout(killTimeout) const result = { exitCode: code, exitSignal: signal, stdout: stdout.data, stderr: stderr.data, } resolve({ throw: () => !code && !signal ? { stdout: stdout.data, stderr: stderr.data } : (() => { throw new ExitError(command[0], result) })(), ...result, }) }) }) } /** * @description run a command inside this subcontainer, throwing on non-zero exit status * @param commands an array representing the command and args to execute * @param options * @param timeoutMs how long to wait before killing the command in ms * @returns */ async execFail( command: string[], options?: CommandOptions & ExecOptions, timeoutMs?: number | null, abort?: AbortController, ): Promise<{ stdout: string | Buffer stderr: string | Buffer }> { return this.exec(command, options, timeoutMs, abort).then((res) => res.throw(), ) } async launch( command: string[], options?: CommandOptions, ): Promise { await this.waitProc() const imageMeta: T.ImageMetadata = await fs .readFile(`/media/startos/images/${this.imageId}.json`, { encoding: 'utf8', }) .catch(() => '{}') .then(JSON.parse) let extra: string[] = [] let user = imageMeta.user || 'root' if (options?.user) { user = options.user delete options.user } let workdir = imageMeta.workdir || '/' if (options?.cwd) { workdir = options.cwd delete options.cwd } if (options?.env) { for (let [k, v] of Object.entries(options.env).filter( ([_, v]) => v != undefined, )) { extra.push(`--env=${k}=${v}`) } } await this.killLeader() this.leaderExited = false this.leader = cp.spawn( 'start-container', [ 'subcontainer', 'launch', `--env-file=/media/startos/images/${this.imageId}.env`, `--user=${user}`, `--workdir=${workdir}`, ...extra, this.rootfs, ...command, ], { ...options, stdio: 'inherit' }, ) this.leader.on('exit', () => { this.leaderExited = true }) return this.leader as cp.ChildProcessWithoutNullStreams } async spawn( command: string[], options: CommandOptions & StdioOptions = { stdio: 'inherit' }, ): Promise { await this.waitProc() const imageMeta: T.ImageMetadata = await fs .readFile(`/media/startos/images/${this.imageId}.json`, { encoding: 'utf8', }) .catch(() => '{}') .then(JSON.parse) let extra: string[] = [] let user = imageMeta.user || 'root' if (options?.user) { user = options.user delete options.user } let workdir = imageMeta.workdir || '/' if (options.cwd) { workdir = options.cwd delete options.cwd } if (options?.env) { for (let [k, v] of Object.entries(options.env).filter( ([_, v]) => v != undefined, )) { extra.push(`--env=${k}=${v}`) } } return cp.spawn( 'start-container', [ 'subcontainer', 'exec', `--env-file=/media/startos/images/${this.imageId}.env`, `--user=${user}`, `--workdir=${workdir}`, ...extra, this.rootfs, ...command, ], options, ) } /** * @description Write a file to the subcontainer's filesystem * @param path Path relative to the subcontainer rootfs (e.g. "/etc/config.json") * @param data The data to write * @param options Optional write options (same as node:fs/promises writeFile) */ async writeFile( path: string, data: | string | NodeJS.ArrayBufferView | Iterable | AsyncIterable, options?: Parameters[2], ): Promise { const fullPath = this.subpath(path) const dir = fullPath.replace(/\/[^/]*\/?$/, '') await fs.mkdir(dir, { recursive: true }) return fs.writeFile(fullPath, data, options) } rc(): SubContainerRc { return new SubContainerRc(this) } isOwned(): this is SubContainerOwned { return true } } /** * A reference-counted handle to a {@link SubContainerOwned}. * * Multiple `SubContainerRc` instances can share one underlying subcontainer. * The subcontainer is destroyed only when the last reference is released via `destroy()`. */ export class SubContainerRc< Manifest extends T.SDKManifest, Effects extends T.Effects = T.Effects, > extends Drop implements SubContainer { get imageId() { return this.subcontainer.imageId } get rootfs() { return this.subcontainer.rootfs } get guid() { return this.subcontainer.guid } subpath(path: string): string { return this.subcontainer.subpath(path) } private destroyed = false private destroying: Promise | null = null public constructor( private readonly subcontainer: SubContainerOwned, ) { subcontainer.rcs++ super() } static async of( effects: Effects, image: { imageId: keyof Manifest['images'] & T.ImageId sharedRun?: boolean }, mounts: | (Effects extends BackupEffects ? Mounts< Manifest, { subpath: string | null mountpoint: string } > : Mounts) | null, name: string, ) { return new SubContainerRc( await SubContainerOwned.of(effects, image, mounts, name), ) } static async withTemp< Manifest extends T.SDKManifest, T, Effects extends T.Effects, >( effects: Effects, image: { imageId: keyof Manifest['images'] & T.ImageId sharedRun?: boolean }, mounts: | (Effects extends BackupEffects ? Mounts< Manifest, { subpath: string | null mountpoint: string } > : Mounts) | null, name: string, fn: (subContainer: SubContainer) => Promise, ): Promise { const subContainer = await SubContainerRc.of(effects, image, mounts, name) try { return await fn(subContainer) } finally { await subContainer.destroy() } } async mount( mounts: Effects extends BackupEffects ? Mounts< Manifest, { subpath: string | null mountpoint: string } > : Mounts, ): Promise { await this.subcontainer.mount(mounts) return this } get destroy() { return async () => { if (!this.destroyed && !this.destroying) { const rcs = --this.subcontainer.rcs if (rcs <= 0) { this.destroying = this.subcontainer.destroy() if (rcs < 0) console.error(new Error('UNREACHABLE: rcs < 0').stack) } } if (this.destroying) { await this.destroying } this.destroyed = true this.destroying = null return null } } onDrop(): void { this.destroy() } /** * @description run a command inside this subcontainer * DOES NOT THROW ON NONZERO EXIT CODE (see execFail) * @param commands an array representing the command and args to execute * @param options * @param timeoutMs how long to wait before killing the command in ms * @returns */ async exec( command: string[], options?: CommandOptions & ExecOptions, timeoutMs?: number | null, abort?: AbortController, ): Promise<{ throw: () => { stdout: string | Buffer; stderr: string | Buffer } exitCode: number | null exitSignal: NodeJS.Signals | null stdout: string | Buffer stderr: string | Buffer }> { return this.subcontainer.exec(command, options, timeoutMs, abort) } /** * @description run a command inside this subcontainer, throwing on non-zero exit status * @param commands an array representing the command and args to execute * @param options * @param timeoutMs how long to wait before killing the command in ms * @returns */ async execFail( command: string[], options?: CommandOptions & ExecOptions, timeoutMs?: number | null, abort?: AbortController, ): Promise<{ stdout: string | Buffer stderr: string | Buffer }> { return this.subcontainer.execFail(command, options, timeoutMs, abort) } async launch( command: string[], options?: CommandOptions, ): Promise { return this.subcontainer.launch(command, options) } async spawn( command: string[], options: CommandOptions & StdioOptions = { stdio: 'inherit' }, ): Promise { return this.subcontainer.spawn(command, options) } /** * @description Write a file to the subcontainer's filesystem * @param path Path relative to the subcontainer rootfs (e.g. "/etc/config.json") * @param data The data to write * @param options Optional write options (same as node:fs/promises writeFile) */ async writeFile( path: string, data: | string | NodeJS.ArrayBufferView | Iterable | AsyncIterable, options?: Parameters[2], ): Promise { return this.subcontainer.writeFile(path, data, options) } rc(): SubContainerRc { return this.subcontainer.rc() } isOwned(): this is SubContainerOwned { return false } } export type CommandOptions = { /** * Environment variables to set for this command */ env?: { [variable in string]?: string } /** * the working directory to run this command in */ cwd?: string /** * the user to run this command as */ user?: string } export type StdioOptions = { stdio?: cp.IOType } /** UID/GID mapping for mount id-remapping (see kernel idmappings docs) */ export type IdMap = { fromId: number; toId: number; range: number } /** Union of all mount option types supported by the subcontainer runtime */ export type MountOptions = | MountOptionsVolume | MountOptionsAssets | MountOptionsPointer | MountOptionsBackup /** Mount options for binding a service volume into a subcontainer */ export type MountOptionsVolume = { type: 'volume' volumeId: string subpath: string | null readonly: boolean filetype: 'file' | 'directory' | 'infer' idmap: IdMap[] } /** Mount options for binding packaged static assets into a subcontainer */ export type MountOptionsAssets = { type: 'assets' subpath: string | null filetype: 'file' | 'directory' | 'infer' idmap: { fromId: number; toId: number; range: number }[] } /** Mount options for binding a dependency package's volume into a subcontainer */ export type MountOptionsPointer = { type: 'pointer' packageId: string volumeId: string subpath: string | null readonly: boolean idmap: { fromId: number; toId: number; range: number }[] } /** Mount options for binding the backup directory into a subcontainer */ export type MountOptionsBackup = { type: 'backup' subpath: string | null filetype: 'file' | 'directory' | 'infer' idmap: { fromId: number; toId: number; range: number }[] } function wait(time: number) { return new Promise((resolve) => setTimeout(resolve, time)) } /** * Error thrown when a subcontainer command exits with a non-zero code or signal. * Contains the full result including stdout, stderr, exit code, and exit signal. */ export class ExitError extends Error { constructor( readonly command: string, readonly result: { exitCode: number | null exitSignal: T.Signals | null stdout: string | Buffer stderr: string | Buffer }, ) { let message: string if (result.exitCode) { message = `${command} failed with exit code ${result.exitCode}: ${result.stderr}` } else if (result.exitSignal) { message = `${command} terminated with signal ${result.exitSignal}: ${result.stderr}` } else { message = `${command} succeeded: ${result.stdout}` } super(message) } }