From d708ae53dfd2efdcefdd0cc433c171bc7110f09a Mon Sep 17 00:00:00 2001 From: Aiden McClelland Date: Fri, 9 Jan 2026 10:53:58 -0700 Subject: [PATCH] handle nvidia-container differently --- core/src/context/rpc.rs | 92 +-------------- core/src/disk/mount/filesystem/overlayfs.rs | 29 +++++ core/src/service/effects/subcontainer/mod.rs | 114 ++++++++++++++----- 3 files changed, 116 insertions(+), 119 deletions(-) diff --git a/core/src/context/rpc.rs b/core/src/context/rpc.rs index 3113dd2a4..f8be8af19 100644 --- a/core/src/context/rpc.rs +++ b/core/src/context/rpc.rs @@ -15,7 +15,6 @@ use josekit::jwk::Jwk; use reqwest::{Client, Proxy}; use rpc_toolkit::yajrc::RpcError; use rpc_toolkit::{CallRemote, Context, Empty}; -use tokio::process::Command; use tokio::sync::{RwLock, broadcast, oneshot, watch}; use tokio::time::Instant; use tracing::instrument; @@ -27,11 +26,6 @@ use crate::context::config::ServerConfig; use crate::db::model::Database; use crate::db::model::package::TaskSeverity; use crate::disk::OsPartitionInfo; -use crate::disk::mount::filesystem::bind::Bind; -use crate::disk::mount::filesystem::block_dev::BlockDev; -use crate::disk::mount::filesystem::loop_dev::LoopDev; -use crate::disk::mount::filesystem::{FileSystem, ReadOnly}; -use crate::disk::mount::guard::MountGuard; use crate::init::{InitResult, check_time_is_synchronized}; use crate::install::PKG_ARCHIVE_DIR; use crate::lxc::LxcManager; @@ -47,14 +41,12 @@ use crate::rpc_continuations::{Guid, OpenAuthedContinuations, RpcContinuations}; use crate::service::ServiceMap; use crate::service::action::update_tasks; use crate::service::effects::callbacks::ServiceCallbacks; -use crate::service::effects::subcontainer::NVIDIA_OVERLAY_PATH; use crate::shutdown::Shutdown; -use crate::util::Invoke; use crate::util::future::NonDetachingJoinHandle; -use crate::util::io::{TmpDir, delete_file}; +use crate::util::io::delete_file; use crate::util::lshw::LshwDevice; use crate::util::sync::{SyncMutex, SyncRwLock, Watch}; -use crate::{ActionId, DATA_DIR, PLATFORM, PackageId}; +use crate::{ActionId, DATA_DIR, PackageId}; pub struct RpcContextSeed { is_closed: AtomicBool, @@ -175,86 +167,6 @@ impl RpcContext { init_net_ctrl.complete(); tracing::info!("Initialized Net Controller"); - if PLATFORM.ends_with("-nonfree") { - if let Err(e) = Command::new("nvidia-smi") - .invoke(ErrorKind::ParseSysInfo) - .await - { - tracing::warn!("nvidia-modprobe: {e}"); - tracing::info!("The above warning can be ignored if no NVIDIA card is present"); - } else { - async { - let version: InternedString = String::from_utf8( - Command::new("modinfo") - .arg("-F") - .arg("version") - .arg("nvidia") - .invoke(ErrorKind::ParseSysInfo) - .await?, - )? - .trim() - .into(); - let sqfs = Path::new("/media/startos/data/package-data/nvidia") - .join(&*version) - .join("container-overlay.squashfs"); - if tokio::fs::metadata(&sqfs).await.is_err() { - let tmp = TmpDir::new().await?; - let procfs = MountGuard::mount( - &Bind::new("/proc"), - Path::new(&*tmp).join("proc"), - ReadOnly, - ) - .await?; - Command::new("nvidia-container-cli") - .arg("configure") - .arg("--no-devbind") - .arg("--no-cgroups") - .arg("--utility") - .arg("--compute") - .arg("--graphics") - .arg("--video") - .arg(&*tmp) - .invoke(ErrorKind::Unknown) - .await?; - procfs.unmount(true).await?; - Command::new("ln") - .arg("-rsf") - .arg( - tmp.join("usr/lib64/libnvidia-ml.so") - .with_added_extension(&*version), - ) - .arg(tmp.join("usr/lib64/libnvidia-ml.so.1")) - .invoke(ErrorKind::Filesystem) - .await?; - Command::new("chown") - .arg("-R") - .arg("100000:100000") - .arg(&*tmp) - .invoke(ErrorKind::Filesystem) - .await?; - if let Some(p) = sqfs.parent() { - tokio::fs::create_dir_all(p) - .await - .with_ctx(|_| (ErrorKind::Filesystem, format!("mkdir -p {p:?}")))?; - } - Command::new("mksquashfs") - .arg(&*tmp) - .arg(&sqfs) - .invoke(ErrorKind::Filesystem) - .await?; - tmp.unmount_and_delete().await?; - } - BlockDev::new(&sqfs) - .mount(NVIDIA_OVERLAY_PATH, ReadOnly) - .await?; - - Ok::<_, Error>(()) - } - .await - .log_err(); - } - } - let services = ServiceMap::default(); let metrics_cache = Watch::>::new(None); let socks_proxy_url = format!("socks5h://{socks_proxy}"); diff --git a/core/src/disk/mount/filesystem/overlayfs.rs b/core/src/disk/mount/filesystem/overlayfs.rs index 2a3125f0d..e0e50b562 100644 --- a/core/src/disk/mount/filesystem/overlayfs.rs +++ b/core/src/disk/mount/filesystem/overlayfs.rs @@ -147,6 +147,35 @@ impl OverlayGuard { } Ok(()) } + /// Remounts the overlay at a new location. The old mountpoint is unmounted first. + pub async fn remount(&mut self, new_mountpoint: impl AsRef) -> Result<(), Error> { + let lower = self.lower.as_ref().ok_or_else(|| { + Error::new( + eyre!("OverlayGuard has no lower layer"), + crate::ErrorKind::Incoherent, + ) + })?; + let upper = self.upper.as_ref().ok_or_else(|| { + Error::new( + eyre!("OverlayGuard has no upper layer"), + crate::ErrorKind::Incoherent, + ) + })?; + // Unmount from current location + self.inner_guard.take().unmount(true).await?; + // Remount at new location + self.inner_guard = MountGuard::mount( + &OverlayFs::new( + vec![lower.path()], + upper.as_ref().join("upper"), + upper.as_ref().join("work"), + ), + new_mountpoint, + ReadWrite, + ) + .await?; + Ok(()) + } pub fn take(&mut self) -> Self { Self { lower: self.lower.take(), diff --git a/core/src/service/effects/subcontainer/mod.rs b/core/src/service/effects/subcontainer/mod.rs index 765218ea7..46a014b79 100644 --- a/core/src/service/effects/subcontainer/mod.rs +++ b/core/src/service/effects/subcontainer/mod.rs @@ -4,15 +4,15 @@ use imbl_value::InternedString; use tokio::process::Command; use crate::ImageId; +use crate::disk::mount::filesystem::bind::Bind; use crate::disk::mount::filesystem::overlayfs::OverlayGuard; -use crate::disk::mount::guard::GenericMountGuard; +use crate::disk::mount::filesystem::ReadOnly; +use crate::disk::mount::guard::{GenericMountGuard, MountGuard, TMP_MOUNTPOINT}; use crate::rpc_continuations::Guid; use crate::service::effects::prelude::*; use crate::service::persistent_container::Subcontainer; use crate::util::Invoke; -pub const NVIDIA_OVERLAY_PATH: &str = "/var/tmp/startos/nvidia-overlay"; - #[cfg(target_os = "linux")] mod sync; @@ -104,37 +104,93 @@ pub async fn create_subcontainer_fs( ) })? .rootfs_dir(); - let mountpoint = rootfs_dir + let final_mountpoint = rootfs_dir .join("media/startos/subcontainers") .join(guid.as_ref()); - tokio::fs::create_dir_all(&mountpoint).await?; + tokio::fs::create_dir_all(&final_mountpoint).await?; let container_mountpoint = Path::new("/").join( - mountpoint - .strip_prefix(rootfs_dir) + final_mountpoint + .strip_prefix(&rootfs_dir) .with_kind(ErrorKind::Incoherent)?, ); - tracing::info!("Mounting overlay {guid} for {image_id}"); + + let nvidia_container = context + .seed + .persistent_container + .s9pk + .as_manifest() + .images + .get(&image_id) + .map_or(false, |i| i.nvidia_container); + + // If nvidia_container is enabled, we need to stage the overlay outside the LXC rootfs + // to safely mount /proc for nvidia-container-cli without exposing it to the container + let overlay = if nvidia_container { + // Create staging directory outside LXC rootfs + let staging_dir = Path::new(TMP_MOUNTPOINT) + .join("nvidia-staging") + .join(guid.as_ref()); + tokio::fs::create_dir_all(&staging_dir).await?; + + tracing::info!("Mounting overlay {guid} for {image_id} at staging location"); + let mut overlay = OverlayGuard::mount(image, &staging_dir).await?; + + // Mount /proc temporarily for nvidia-container-cli (outside LXC rootfs) + let staging_proc = staging_dir.join("proc"); + tokio::fs::create_dir_all(&staging_proc).await?; + let proc_mount = MountGuard::mount(&Bind::new("/proc"), &staging_proc, ReadOnly).await?; + + // Read environment variables from the image's env file + let env_file = rootfs_dir + .join("media/startos/images") + .join(image_id.as_ref()) + .with_extension("env"); + let env_content = tokio::fs::read_to_string(&env_file) + .await + .unwrap_or_default(); + + // Build nvidia-container-cli command with environment variables + let mut cmd = Command::new("nvidia-container-cli"); + cmd.arg("configure") + .arg("--no-cgroups") + .arg("--utility") + .arg("--compute") + .arg("--graphics") + .arg("--video"); + + // Pass NVIDIA_* environment variables to nvidia-container-cli + for line in env_content.lines() { + if let Some((key, value)) = line.split_once('=') { + if key.starts_with("NVIDIA_") { + cmd.env(key, value); + } + } + } + + cmd.arg(&staging_dir); + + tracing::info!("Running nvidia-container-cli for {image_id}"); + cmd.invoke(ErrorKind::Unknown).await?; + + // Unmount /proc + proc_mount.unmount(false).await?; + tracing::info!("nvidia-container-cli completed for {image_id}"); + + // Remount overlay at final location inside LXC rootfs + tracing::info!("Remounting overlay {guid} to final location"); + overlay.remount(&final_mountpoint).await?; + + // Clean up staging directory + tokio::fs::remove_dir_all(&staging_dir).await.ok(); + + overlay + } else { + tracing::info!("Mounting overlay {guid} for {image_id}"); + OverlayGuard::mount(image, &final_mountpoint).await? + }; + let subcontainer_wrapper = Subcontainer { - overlay: OverlayGuard::mount_layers( - &[], - image, - if context - .seed - .persistent_container - .s9pk - .as_manifest() - .images - .get(&image_id) - .map_or(false, |i| i.nvidia_container) - && tokio::fs::metadata(NVIDIA_OVERLAY_PATH).await.is_ok() - { - &[NVIDIA_OVERLAY_PATH] - } else { - &[] - }, - &mountpoint, - ) - .await?, + overlay, name: name .unwrap_or_else(|| InternedString::intern(format!("subcontainer-{}", image_id))), image_id: image_id.clone(), @@ -142,7 +198,7 @@ pub async fn create_subcontainer_fs( Command::new("chown") .arg("100000:100000") - .arg(&mountpoint) + .arg(&final_mountpoint) .invoke(ErrorKind::Filesystem) .await?; tracing::info!("Mounted overlay {guid} for {image_id}");