fix debian containers

This commit is contained in:
Aiden McClelland
2026-01-13 22:50:03 -07:00
parent 0ce125b72c
commit 97575bb25b
2 changed files with 86 additions and 39 deletions

View File

@@ -180,7 +180,7 @@ impl RpcContext {
.invoke(ErrorKind::ParseSysInfo)
.await
{
tracing::warn!("nvidia-modprobe: {e}");
tracing::warn!("nvidia-smi: {e}");
tracing::info!("The above warning can be ignored if no NVIDIA card is present");
} else {
async {
@@ -194,14 +194,24 @@ impl RpcContext {
)?
.trim()
.into();
let sqfs = Path::new("/media/startos/data/package-data/nvidia")
.join(&*version)
.join("container-overlay.squashfs");
let nvidia_dir =
Path::new("/media/startos/data/package-data/nvidia").join(&*version);
// Generate single squashfs with both debian and generic overlays
let sqfs = nvidia_dir.join("container-overlay.squashfs");
if tokio::fs::metadata(&sqfs).await.is_err() {
let tmp = TmpDir::new().await?;
// Generate debian overlay (libs in /usr/lib/aarch64-linux-gnu/)
let debian_dir = tmp.join("debian");
tokio::fs::create_dir_all(&debian_dir).await?;
// Create /etc/debian_version to trigger debian path detection
tokio::fs::create_dir_all(debian_dir.join("etc")).await?;
tokio::fs::write(debian_dir.join("etc/debian_version"), "").await?;
let procfs = MountGuard::mount(
&Bind::new("/proc"),
Path::new(&*tmp).join("proc"),
debian_dir.join("proc"),
ReadOnly,
)
.await?;
@@ -213,25 +223,49 @@ impl RpcContext {
.arg("--compute")
.arg("--graphics")
.arg("--video")
.arg(&*tmp)
.arg(&debian_dir)
.invoke(ErrorKind::Unknown)
.await?;
procfs.unmount(true).await?;
Command::new("ln")
.arg("-rsf")
.arg(
tmp.join("usr/lib64/libnvidia-ml.so")
.with_added_extension(&*version),
)
.arg(tmp.join("usr/lib64/libnvidia-ml.so.1"))
.invoke(ErrorKind::Filesystem)
// Run ldconfig to create proper symlinks for all NVIDIA libraries
Command::new("ldconfig")
.arg("-r")
.arg(&debian_dir)
.invoke(ErrorKind::Unknown)
.await?;
Command::new("chown")
.arg("-R")
.arg("100000:100000")
.arg(&*tmp)
.invoke(ErrorKind::Filesystem)
// Remove /etc/debian_version - it was only needed for nvidia-container-cli detection
tokio::fs::remove_file(debian_dir.join("etc/debian_version")).await?;
// Generate generic overlay (libs in /usr/lib64/)
let generic_dir = tmp.join("generic");
tokio::fs::create_dir_all(&generic_dir).await?;
// No /etc/debian_version - will use generic /usr/lib64 paths
let procfs = MountGuard::mount(
&Bind::new("/proc"),
generic_dir.join("proc"),
ReadOnly,
)
.await?;
Command::new("nvidia-container-cli")
.arg("configure")
.arg("--no-devbind")
.arg("--no-cgroups")
.arg("--utility")
.arg("--compute")
.arg("--graphics")
.arg("--video")
.arg(&generic_dir)
.invoke(ErrorKind::Unknown)
.await?;
procfs.unmount(true).await?;
// Run ldconfig to create proper symlinks for all NVIDIA libraries
Command::new("ldconfig")
.arg("-r")
.arg(&generic_dir)
.invoke(ErrorKind::Unknown)
.await?;
// Create squashfs with UID/GID mapping (avoids chown on readonly mounts)
if let Some(p) = sqfs.parent() {
tokio::fs::create_dir_all(p)
.await
@@ -240,6 +274,10 @@ impl RpcContext {
Command::new("mksquashfs")
.arg(&*tmp)
.arg(&sqfs)
.arg("-force-uid")
.arg("100000")
.arg("-force-gid")
.arg("100000")
.invoke(ErrorKind::Filesystem)
.await?;
tmp.unmount_and_delete().await?;

View File

@@ -12,6 +12,8 @@ use crate::service::persistent_container::Subcontainer;
use crate::util::Invoke;
pub const NVIDIA_OVERLAY_PATH: &str = "/var/tmp/startos/nvidia-overlay";
pub const NVIDIA_OVERLAY_DEBIAN: &str = "/var/tmp/startos/nvidia-overlay/debian";
pub const NVIDIA_OVERLAY_GENERIC: &str = "/var/tmp/startos/nvidia-overlay/generic";
#[cfg(target_os = "linux")]
mod sync;
@@ -114,27 +116,34 @@ pub async fn create_subcontainer_fs(
.with_kind(ErrorKind::Incoherent)?,
);
tracing::info!("Mounting overlay {guid} for {image_id}");
// Determine which nvidia overlay to use based on distro detection
let nvidia_overlay: &[&str] = if context
.seed
.persistent_container
.s9pk
.as_manifest()
.images
.get(&image_id)
.map_or(false, |i| i.nvidia_container)
{
// Check if image is debian-based by looking for /etc/debian_version
let is_debian = tokio::fs::metadata(image.path().join("etc/debian_version"))
.await
.is_ok();
if is_debian && tokio::fs::metadata(NVIDIA_OVERLAY_DEBIAN).await.is_ok() {
&[NVIDIA_OVERLAY_DEBIAN]
} else if tokio::fs::metadata(NVIDIA_OVERLAY_GENERIC).await.is_ok() {
&[NVIDIA_OVERLAY_GENERIC]
} else {
&[]
}
} else {
&[]
};
let subcontainer_wrapper = Subcontainer {
overlay: OverlayGuard::mount_layers(
&[],
image,
if context
.seed
.persistent_container
.s9pk
.as_manifest()
.images
.get(&image_id)
.map_or(false, |i| i.nvidia_container)
&& tokio::fs::metadata(NVIDIA_OVERLAY_PATH).await.is_ok()
{
&[NVIDIA_OVERLAY_PATH]
} else {
&[]
},
&mountpoint,
)
.await?,
overlay: OverlayGuard::mount_layers(&[], image, nvidia_overlay, &mountpoint).await?,
name: name
.unwrap_or_else(|| InternedString::intern(format!("subcontainer-{}", image_id))),
image_id: image_id.clone(),