mirror of
https://github.com/Start9Labs/start-os.git
synced 2026-03-26 02:11:53 +00:00
fix: replace stdio chown with prctl(PR_SET_DUMPABLE) and pipe-wrap
After setuid, the kernel clears the dumpable flag, making /proc/self/
entries owned by root. This broke open("/dev/stderr") for non-root
users inside subcontainers. The previous fix (chowning /proc/self/fd/*)
was dangerous because it chowned whatever file the FD pointed to (could
be the journal socket).
The proper fix is prctl(PR_SET_DUMPABLE, 1) after setuid, which restores
/proc/self/ ownership to the current uid.
Additionally, adds a `pipe-wrap` subcommand that wraps a child process
with piped stdout/stderr, relaying to the original FDs. This ensures all
descendants inherit pipes (which support re-opening via /proc/self/fd/N)
even when the outermost FDs are journal sockets. container-runtime.service
now uses this wrapper.
With pipe-wrap guaranteeing pipe-based FDs, the exec and launch non-TTY
paths no longer need their own pipe+relay threads, eliminating the bug
where exec would hang when a child daemonized (e.g. pg_ctl start).
This commit is contained in:
@@ -5,7 +5,7 @@ OnFailure=container-runtime-failure.service
|
||||
[Service]
|
||||
Type=simple
|
||||
Environment=RUST_LOG=startos=debug
|
||||
ExecStart=/usr/bin/node --experimental-detect-module --trace-warnings /usr/lib/startos/init/index.js
|
||||
ExecStart=/usr/bin/start-container pipe-wrap /usr/bin/node --experimental-detect-module --trace-warnings /usr/lib/startos/init/index.js
|
||||
Restart=no
|
||||
|
||||
[Install]
|
||||
|
||||
@@ -125,6 +125,10 @@ pub fn handler<C: Context>() -> ParentHandler<C> {
|
||||
.with_call_remote::<ContainerCliContext>(),
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
"pipe-wrap",
|
||||
from_fn_blocking(subcontainer::pipe_wrap).no_display(),
|
||||
)
|
||||
// net
|
||||
.subcommand("bind", from_fn_async(net::bind::bind).no_cli())
|
||||
.subcommand(
|
||||
|
||||
@@ -487,28 +487,14 @@ pub fn launch(
|
||||
}
|
||||
cmd.arg(&chroot);
|
||||
cmd.args(&command);
|
||||
cmd.stdin(Stdio::piped());
|
||||
cmd.stdout(Stdio::piped());
|
||||
cmd.stderr(Stdio::piped());
|
||||
let mut child = cmd
|
||||
.spawn()
|
||||
.map_err(color_eyre::eyre::Report::msg)
|
||||
.with_ctx(|_| (ErrorKind::Filesystem, "spawning child process"))?;
|
||||
send_pid.send(child.id() as i32).unwrap_or_default();
|
||||
stdin_send
|
||||
.send(Box::new(child.stdin.take().unwrap()))
|
||||
.unwrap_or_default();
|
||||
stdout_send
|
||||
.send(Box::new(child.stdout.take().unwrap()))
|
||||
.unwrap_or_default();
|
||||
stderr_send
|
||||
.send(Box::new(child.stderr.take().unwrap()))
|
||||
.unwrap_or_default();
|
||||
let exit = child
|
||||
.wait()
|
||||
.with_ctx(|_| (ErrorKind::Filesystem, "waiting on child process"))?;
|
||||
stdout_thread.join().unwrap();
|
||||
stderr_thread.map(|t| t.join().unwrap());
|
||||
if let Some(code) = exit.code() {
|
||||
nix::mount::umount(&chroot.join("proc"))
|
||||
.with_ctx(|_| (ErrorKind::Filesystem, "umount procfs"))?;
|
||||
@@ -767,3 +753,96 @@ pub fn exec(
|
||||
pub fn exec_command(_: ContainerCliContext, params: ExecParams) -> Result<(), Error> {
|
||||
params.exec()
|
||||
}
|
||||
|
||||
/// Wrap a child process so that its stdout/stderr are always pipes, even when
|
||||
/// the wrapper's own FDs are sockets (e.g. systemd journal). This lets
|
||||
/// descendants `open("/dev/stderr")` via `/proc/self/fd/2` without ENXIO.
|
||||
pub fn pipe_wrap(
|
||||
_: ContainerCliContext,
|
||||
PipeWrapParams { command }: PipeWrapParams,
|
||||
) -> Result<(), Error> {
|
||||
use std::os::fd::AsRawFd;
|
||||
|
||||
let Some(([program], args)) = command.split_at_checked(1) else {
|
||||
return Err(Error::new(
|
||||
eyre!("pipe-wrap: command cannot be empty"),
|
||||
ErrorKind::InvalidRequest,
|
||||
));
|
||||
};
|
||||
|
||||
let mut cmd = StdCommand::new(program);
|
||||
cmd.args(args);
|
||||
cmd.stdout(Stdio::piped());
|
||||
cmd.stderr(Stdio::piped());
|
||||
|
||||
let mut child = cmd
|
||||
.spawn()
|
||||
.with_ctx(|_| (ErrorKind::Filesystem, "pipe-wrap: spawning child process"))?;
|
||||
|
||||
let child_stdout = child.stdout.take().unwrap();
|
||||
let child_stderr = child.stderr.take().unwrap();
|
||||
|
||||
let orig_stdout_fd = std::io::stdout().as_raw_fd();
|
||||
let orig_stderr_fd = std::io::stderr().as_raw_fd();
|
||||
|
||||
// Relay child stdout → original stdout (which may be a socket)
|
||||
std::thread::spawn(move || {
|
||||
let mut reader = child_stdout;
|
||||
let mut buf = [0u8; 8192];
|
||||
loop {
|
||||
match Read::read(&mut reader, &mut buf) {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => {
|
||||
let _ = nix::unistd::write(
|
||||
unsafe { std::os::fd::BorrowedFd::borrow_raw(orig_stdout_fd) },
|
||||
&buf[..n],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Relay child stderr → original stderr
|
||||
std::thread::spawn(move || {
|
||||
let mut reader = child_stderr;
|
||||
let mut buf = [0u8; 8192];
|
||||
loop {
|
||||
match Read::read(&mut reader, &mut buf) {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => {
|
||||
let _ = nix::unistd::write(
|
||||
unsafe { std::os::fd::BorrowedFd::borrow_raw(orig_stderr_fd) },
|
||||
&buf[..n],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Forward signals to the child
|
||||
let child_pid = child.id() as i32;
|
||||
let mut sig = signal_hook::iterator::Signals::new(FWD_SIGNALS)?;
|
||||
std::thread::spawn(move || {
|
||||
for sig in sig.forever() {
|
||||
match nix::sys::signal::kill(
|
||||
Pid::from_raw(child_pid),
|
||||
Some(nix::sys::signal::Signal::try_from(sig).unwrap()),
|
||||
) {
|
||||
Err(Errno::ESRCH) => break,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let status = child
|
||||
.wait()
|
||||
.with_ctx(|_| (ErrorKind::Filesystem, "pipe-wrap: waiting on child"))?;
|
||||
std::process::exit(status.code().unwrap_or(1))
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Parser)]
|
||||
#[group(skip)]
|
||||
pub struct PipeWrapParams {
|
||||
#[arg(trailing_var_arg = true, help = "help.arg.command-to-execute")]
|
||||
command: Vec<OsString>,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user