fix debian containers

This commit is contained in:
Aiden McClelland
2026-01-13 22:50:03 -07:00
parent 0ce125b72c
commit 97575bb25b
2 changed files with 86 additions and 39 deletions

View File

@@ -180,7 +180,7 @@ impl RpcContext {
.invoke(ErrorKind::ParseSysInfo) .invoke(ErrorKind::ParseSysInfo)
.await .await
{ {
tracing::warn!("nvidia-modprobe: {e}"); tracing::warn!("nvidia-smi: {e}");
tracing::info!("The above warning can be ignored if no NVIDIA card is present"); tracing::info!("The above warning can be ignored if no NVIDIA card is present");
} else { } else {
async { async {
@@ -194,14 +194,24 @@ impl RpcContext {
)? )?
.trim() .trim()
.into(); .into();
let sqfs = Path::new("/media/startos/data/package-data/nvidia")
.join(&*version) let nvidia_dir =
.join("container-overlay.squashfs"); Path::new("/media/startos/data/package-data/nvidia").join(&*version);
// Generate single squashfs with both debian and generic overlays
let sqfs = nvidia_dir.join("container-overlay.squashfs");
if tokio::fs::metadata(&sqfs).await.is_err() { if tokio::fs::metadata(&sqfs).await.is_err() {
let tmp = TmpDir::new().await?; let tmp = TmpDir::new().await?;
// Generate debian overlay (libs in /usr/lib/aarch64-linux-gnu/)
let debian_dir = tmp.join("debian");
tokio::fs::create_dir_all(&debian_dir).await?;
// Create /etc/debian_version to trigger debian path detection
tokio::fs::create_dir_all(debian_dir.join("etc")).await?;
tokio::fs::write(debian_dir.join("etc/debian_version"), "").await?;
let procfs = MountGuard::mount( let procfs = MountGuard::mount(
&Bind::new("/proc"), &Bind::new("/proc"),
Path::new(&*tmp).join("proc"), debian_dir.join("proc"),
ReadOnly, ReadOnly,
) )
.await?; .await?;
@@ -213,25 +223,49 @@ impl RpcContext {
.arg("--compute") .arg("--compute")
.arg("--graphics") .arg("--graphics")
.arg("--video") .arg("--video")
.arg(&*tmp) .arg(&debian_dir)
.invoke(ErrorKind::Unknown) .invoke(ErrorKind::Unknown)
.await?; .await?;
procfs.unmount(true).await?; procfs.unmount(true).await?;
Command::new("ln") // Run ldconfig to create proper symlinks for all NVIDIA libraries
.arg("-rsf") Command::new("ldconfig")
.arg( .arg("-r")
tmp.join("usr/lib64/libnvidia-ml.so") .arg(&debian_dir)
.with_added_extension(&*version), .invoke(ErrorKind::Unknown)
)
.arg(tmp.join("usr/lib64/libnvidia-ml.so.1"))
.invoke(ErrorKind::Filesystem)
.await?; .await?;
Command::new("chown") // Remove /etc/debian_version - it was only needed for nvidia-container-cli detection
.arg("-R") tokio::fs::remove_file(debian_dir.join("etc/debian_version")).await?;
.arg("100000:100000")
.arg(&*tmp) // Generate generic overlay (libs in /usr/lib64/)
.invoke(ErrorKind::Filesystem) let generic_dir = tmp.join("generic");
tokio::fs::create_dir_all(&generic_dir).await?;
// No /etc/debian_version - will use generic /usr/lib64 paths
let procfs = MountGuard::mount(
&Bind::new("/proc"),
generic_dir.join("proc"),
ReadOnly,
)
.await?;
Command::new("nvidia-container-cli")
.arg("configure")
.arg("--no-devbind")
.arg("--no-cgroups")
.arg("--utility")
.arg("--compute")
.arg("--graphics")
.arg("--video")
.arg(&generic_dir)
.invoke(ErrorKind::Unknown)
.await?; .await?;
procfs.unmount(true).await?;
// Run ldconfig to create proper symlinks for all NVIDIA libraries
Command::new("ldconfig")
.arg("-r")
.arg(&generic_dir)
.invoke(ErrorKind::Unknown)
.await?;
// Create squashfs with UID/GID mapping (avoids chown on readonly mounts)
if let Some(p) = sqfs.parent() { if let Some(p) = sqfs.parent() {
tokio::fs::create_dir_all(p) tokio::fs::create_dir_all(p)
.await .await
@@ -240,6 +274,10 @@ impl RpcContext {
Command::new("mksquashfs") Command::new("mksquashfs")
.arg(&*tmp) .arg(&*tmp)
.arg(&sqfs) .arg(&sqfs)
.arg("-force-uid")
.arg("100000")
.arg("-force-gid")
.arg("100000")
.invoke(ErrorKind::Filesystem) .invoke(ErrorKind::Filesystem)
.await?; .await?;
tmp.unmount_and_delete().await?; tmp.unmount_and_delete().await?;

View File

@@ -12,6 +12,8 @@ use crate::service::persistent_container::Subcontainer;
use crate::util::Invoke; use crate::util::Invoke;
pub const NVIDIA_OVERLAY_PATH: &str = "/var/tmp/startos/nvidia-overlay"; pub const NVIDIA_OVERLAY_PATH: &str = "/var/tmp/startos/nvidia-overlay";
pub const NVIDIA_OVERLAY_DEBIAN: &str = "/var/tmp/startos/nvidia-overlay/debian";
pub const NVIDIA_OVERLAY_GENERIC: &str = "/var/tmp/startos/nvidia-overlay/generic";
#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
mod sync; mod sync;
@@ -114,27 +116,34 @@ pub async fn create_subcontainer_fs(
.with_kind(ErrorKind::Incoherent)?, .with_kind(ErrorKind::Incoherent)?,
); );
tracing::info!("Mounting overlay {guid} for {image_id}"); tracing::info!("Mounting overlay {guid} for {image_id}");
// Determine which nvidia overlay to use based on distro detection
let nvidia_overlay: &[&str] = if context
.seed
.persistent_container
.s9pk
.as_manifest()
.images
.get(&image_id)
.map_or(false, |i| i.nvidia_container)
{
// Check if image is debian-based by looking for /etc/debian_version
let is_debian = tokio::fs::metadata(image.path().join("etc/debian_version"))
.await
.is_ok();
if is_debian && tokio::fs::metadata(NVIDIA_OVERLAY_DEBIAN).await.is_ok() {
&[NVIDIA_OVERLAY_DEBIAN]
} else if tokio::fs::metadata(NVIDIA_OVERLAY_GENERIC).await.is_ok() {
&[NVIDIA_OVERLAY_GENERIC]
} else {
&[]
}
} else {
&[]
};
let subcontainer_wrapper = Subcontainer { let subcontainer_wrapper = Subcontainer {
overlay: OverlayGuard::mount_layers( overlay: OverlayGuard::mount_layers(&[], image, nvidia_overlay, &mountpoint).await?,
&[],
image,
if context
.seed
.persistent_container
.s9pk
.as_manifest()
.images
.get(&image_id)
.map_or(false, |i| i.nvidia_container)
&& tokio::fs::metadata(NVIDIA_OVERLAY_PATH).await.is_ok()
{
&[NVIDIA_OVERLAY_PATH]
} else {
&[]
},
&mountpoint,
)
.await?,
name: name name: name
.unwrap_or_else(|| InternedString::intern(format!("subcontainer-{}", image_id))), .unwrap_or_else(|| InternedString::intern(format!("subcontainer-{}", image_id))),
image_id: image_id.clone(), image_id: image_id.clone(),