diff --git a/NVIDIA.md b/NVIDIA.md deleted file mode 100644 index a854bfdac..000000000 --- a/NVIDIA.md +++ /dev/null @@ -1,6 +0,0 @@ -install: -dkms -linux-headers-$(uname-r) - -then run: -https://us.download.nvidia.com/XFree86/aarch64/580.119.02/NVIDIA-Linux-aarch64-580.119.02.run diff --git a/build/dpkg-deps/depends b/build/dpkg-deps/depends index 50266ec71..9491a1d48 100644 --- a/build/dpkg-deps/depends +++ b/build/dpkg-deps/depends @@ -16,6 +16,7 @@ dnsutils dosfstools e2fsprogs ecryptfs-utils +equivs exfatprogs flashrom fuse3 diff --git a/build/image-recipe/build.sh b/build/image-recipe/build.sh index 2aa9ccf03..2e4458587 100755 --- a/build/image-recipe/build.sh +++ b/build/image-recipe/build.sh @@ -203,16 +203,14 @@ cat > config/hooks/normal/9000-install-startos.hook.chroot << EOF set -e if [ "${NON_FREE}" = "1" ]; then - # install latest NVIDIA driver in a given major series + # install a specific NVIDIA driver version # ---------------- configuration ---------------- - - # Major driver series, e.g. 550, 560, 570 - NVIDIA_MAJOR="\${NVIDIA_MAJOR:-580}" + NVIDIA_DRIVER_VERSION="\${NVIDIA_DRIVER_VERSION:-580.119.02}" BASE_URL="https://download.nvidia.com/XFree86/Linux-${QEMU_ARCH}" - echo "[nvidia-hook] Using NVIDIA major series: \${NVIDIA_MAJOR}" >&2 + echo "[nvidia-hook] Using NVIDIA driver: \${NVIDIA_DRIVER_VERSION}" >&2 # ---------------- kernel version ---------------- @@ -231,36 +229,26 @@ if [ "${NON_FREE}" = "1" ]; then echo "[nvidia-hook] Target kernel version: \${KVER}" >&2 # Ensure kernel headers are present + TEMP_APT_DEPS=(build-essential) if [ ! -e "/lib/modules/\${KVER}/build" ]; then - apt-get install linux-headers-\${KVER} - echo "[nvidia-hook] ERROR: /lib/modules/\${KVER}/build missing; install headers for \${KVER} before this hook." >&2 - exit 1 + TEMP_APT_DEPS+=(linux-headers-\${KVER}) fi - # ---------------- find latest driver in major series ---------------- + echo "[nvidia-hook] Installing build dependencies" >&2 - echo "[nvidia-hook] Fetching version list from \${BASE_URL}/" >&2 - - NVIDIA_VER="\$( - curl -fsSL "\${BASE_URL}/" \ - | sed -n 's/.*href="([0-9][0-9][0-9][0-9.]*)/".*/\u0001/p' \ - | grep -E "^\${NVIDIA_MAJOR}." \ - | sort -V \ - | tail -n1 - )" - - if [ -z "\${NVIDIA_VER}" ]; then - echo "[nvidia-hook] ERROR: could not find NVIDIA series \${NVIDIA_MAJOR} under \${BASE_URL}/" >&2 - exit 1 - fi - - echo "[nvidia-hook] Selected NVIDIA driver version: \${NVIDIA_VER}" >&2 + /usr/lib/startos/scripts/install-equivs <<-EOF + Package: nvidia-depends + Version: \${NVIDIA_DRIVER_VERSION} + Section: unknown + Priority: optional + Depends: \${dep_list="\$(IFS=', '; echo "\${TEMP_APT_DEPS[*]}")"} + EOF # ---------------- download and run installer ---------------- - RUN_NAME="NVIDIA-Linux-${QEMU_ARCH}-\${NVIDIA_VER}.run" + RUN_NAME="NVIDIA-Linux-${QEMU_ARCH}-\${NVIDIA_DRIVER_VERSION}.run" RUN_PATH="/root/\${RUN_NAME}" - RUN_URL="\${BASE_URL}/\${NVIDIA_VER}/\${RUN_NAME}" + RUN_URL="\${BASE_URL}/\${NVIDIA_DRIVER_VERSION}/\${RUN_NAME}" echo "[nvidia-hook] Downloading \${RUN_URL}" >&2 wget -O "\${RUN_PATH}" "\${RUN_URL}" @@ -279,7 +267,12 @@ if [ "${NON_FREE}" = "1" ]; then echo "[nvidia-hook] Running depmod for \${KVER}" >&2 depmod -a "\${KVER}" - echo "[nvidia-hook] NVIDIA \${NVIDIA_VER} installation complete for kernel \${KVER}" >&2 + echo "[nvidia-hook] NVIDIA \${NVIDIA_DRIVER_VERSION} installation complete for kernel \${KVER}" >&2 + + echo "[nvidia-hook] Removing build dependencies..." >&2 + apt-get purge -y nvidia-depends + apt-get autoremove -y + echo "[nvidia-hook] Removed build dependencies." >&2 fi cp /etc/resolv.conf /etc/resolv.conf.bak diff --git a/build/lib/scripts/install-equivs b/build/lib/scripts/install-equivs new file mode 100755 index 000000000..610881093 --- /dev/null +++ b/build/lib/scripts/install-equivs @@ -0,0 +1,20 @@ +#!/bin/bash + +export DEBIAN_FRONTEND=noninteractive +export DEBCONF_NONINTERACTIVE_SEEN=true + +TMP_DIR=$(mktemp -d) + +( + set -e + cd $TMP_DIR + + cat > control.equivs + equivs-build control.equivs + apt-get install -y ./*.deb < /dev/null +) + +rm -rf $TMP_DIR + +echo Install complete. >&2 +exit 0 \ No newline at end of file diff --git a/container-runtime/update-image-local.sh b/container-runtime/update-image-local.sh index 0e790c655..20dc7a9ef 100755 --- a/container-runtime/update-image-local.sh +++ b/container-runtime/update-image-local.sh @@ -15,4 +15,7 @@ case $ARCH in DOCKER_PLATFORM=linux/arm64;; esac -docker run --rm $USE_TTY --platform=$DOCKER_PLATFORM -eARCH --privileged -v "$(pwd):/root/start-os" start9/build-env /root/start-os/container-runtime/update-image.sh \ No newline at end of file +docker run --rm $USE_TTY --platform=$DOCKER_PLATFORM -eARCH --privileged -v "$(pwd):/root/start-os" start9/build-env /root/start-os/container-runtime/update-image.sh +if [ "$(ls -nd "rootfs.${ARCH}.squashfs" | awk '{ print $3 }')" != "$UID" ]; then + docker run --rm $USE_TTY -v "$(pwd):/root/start-os" start9/build-env chown -R $UID:$UID /root/start-os/container-runtime +fi \ No newline at end of file diff --git a/core/src/context/rpc.rs b/core/src/context/rpc.rs index b0caad80d..3113dd2a4 100644 --- a/core/src/context/rpc.rs +++ b/core/src/context/rpc.rs @@ -27,8 +27,10 @@ use crate::context::config::ServerConfig; use crate::db::model::Database; use crate::db::model::package::TaskSeverity; use crate::disk::OsPartitionInfo; -use crate::disk::mount::filesystem::ReadOnly; use crate::disk::mount::filesystem::bind::Bind; +use crate::disk::mount::filesystem::block_dev::BlockDev; +use crate::disk::mount::filesystem::loop_dev::LoopDev; +use crate::disk::mount::filesystem::{FileSystem, ReadOnly}; use crate::disk::mount::guard::MountGuard; use crate::init::{InitResult, check_time_is_synchronized}; use crate::install::PKG_ARCHIVE_DIR; @@ -49,7 +51,7 @@ use crate::service::effects::subcontainer::NVIDIA_OVERLAY_PATH; use crate::shutdown::Shutdown; use crate::util::Invoke; use crate::util::future::NonDetachingJoinHandle; -use crate::util::io::delete_file; +use crate::util::io::{TmpDir, delete_file}; use crate::util::lshw::LshwDevice; use crate::util::sync::{SyncMutex, SyncRwLock, Watch}; use crate::{ActionId, DATA_DIR, PLATFORM, PackageId}; @@ -174,35 +176,82 @@ impl RpcContext { tracing::info!("Initialized Net Controller"); if PLATFORM.ends_with("-nonfree") { - if let Err(e) = Command::new("nvidia-modprobe") + if let Err(e) = Command::new("nvidia-smi") .invoke(ErrorKind::ParseSysInfo) .await { tracing::warn!("nvidia-modprobe: {e}"); tracing::info!("The above warning can be ignored if no NVIDIA card is present"); } else { - if let Some(procfs) = MountGuard::mount( - &Bind::new("/proc"), - Path::new(NVIDIA_OVERLAY_PATH).join("proc"), - ReadOnly, - ) - .await - .log_err() - { - Command::new("nvidia-container-cli") - .arg("configure") - .arg("--no-devbind") - .arg("--no-cgroups") - .arg("--utility") - .arg("--compute") - .arg("--graphics") - .arg("--video") - .arg(NVIDIA_OVERLAY_PATH) - .invoke(ErrorKind::Unknown) - .await - .log_err(); - procfs.unmount(true).await.log_err(); + async { + let version: InternedString = String::from_utf8( + Command::new("modinfo") + .arg("-F") + .arg("version") + .arg("nvidia") + .invoke(ErrorKind::ParseSysInfo) + .await?, + )? + .trim() + .into(); + let sqfs = Path::new("/media/startos/data/package-data/nvidia") + .join(&*version) + .join("container-overlay.squashfs"); + if tokio::fs::metadata(&sqfs).await.is_err() { + let tmp = TmpDir::new().await?; + let procfs = MountGuard::mount( + &Bind::new("/proc"), + Path::new(&*tmp).join("proc"), + ReadOnly, + ) + .await?; + Command::new("nvidia-container-cli") + .arg("configure") + .arg("--no-devbind") + .arg("--no-cgroups") + .arg("--utility") + .arg("--compute") + .arg("--graphics") + .arg("--video") + .arg(&*tmp) + .invoke(ErrorKind::Unknown) + .await?; + procfs.unmount(true).await?; + Command::new("ln") + .arg("-rsf") + .arg( + tmp.join("usr/lib64/libnvidia-ml.so") + .with_added_extension(&*version), + ) + .arg(tmp.join("usr/lib64/libnvidia-ml.so.1")) + .invoke(ErrorKind::Filesystem) + .await?; + Command::new("chown") + .arg("-R") + .arg("100000:100000") + .arg(&*tmp) + .invoke(ErrorKind::Filesystem) + .await?; + if let Some(p) = sqfs.parent() { + tokio::fs::create_dir_all(p) + .await + .with_ctx(|_| (ErrorKind::Filesystem, format!("mkdir -p {p:?}")))?; + } + Command::new("mksquashfs") + .arg(&*tmp) + .arg(&sqfs) + .invoke(ErrorKind::Filesystem) + .await?; + tmp.unmount_and_delete().await?; + } + BlockDev::new(&sqfs) + .mount(NVIDIA_OVERLAY_PATH, ReadOnly) + .await?; + + Ok::<_, Error>(()) } + .await + .log_err(); } } diff --git a/core/src/disk/mount/util.rs b/core/src/disk/mount/util.rs index 1e0e84952..46fc27890 100644 --- a/core/src/disk/mount/util.rs +++ b/core/src/disk/mount/util.rs @@ -3,6 +3,7 @@ use std::path::Path; use tracing::instrument; use crate::Error; +use crate::prelude::*; use crate::util::Invoke; pub async fn is_mountpoint(path: impl AsRef) -> Result { @@ -56,3 +57,42 @@ pub async fn unmount>(mountpoint: P, lazy: bool) -> Result<(), Er .await?; Ok(()) } + +/// Unmounts all mountpoints under (and including) the given path, in reverse +/// depth order so that nested mounts are unmounted before their parents. +#[instrument(skip_all)] +pub async fn unmount_all_under>(path: P, lazy: bool) -> Result<(), Error> { + let path = path.as_ref(); + let canonical_path = tokio::fs::canonicalize(path) + .await + .with_ctx(|_| (ErrorKind::Filesystem, lazy_format!("canonicalize {path:?}")))?; + + let mounts_content = tokio::fs::read_to_string("/proc/mounts") + .await + .with_ctx(|_| (ErrorKind::Filesystem, "read /proc/mounts"))?; + + // Collect all mountpoints under our path + let mut mountpoints: Vec<&str> = mounts_content + .lines() + .filter_map(|line| { + let mountpoint = line.split_whitespace().nth(1)?; + // Check if this mountpoint is under our target path + let mp_path = Path::new(mountpoint); + if mp_path.starts_with(&canonical_path) { + Some(mountpoint) + } else { + None + } + }) + .collect(); + + // Sort by path length descending so we unmount deepest first + mountpoints.sort_by(|a, b| b.len().cmp(&a.len())); + + for mountpoint in mountpoints { + tracing::debug!("Unmounting nested mountpoint: {}", mountpoint); + unmount(mountpoint, lazy).await?; + } + + Ok(()) +} diff --git a/core/src/service/effects/subcontainer/mod.rs b/core/src/service/effects/subcontainer/mod.rs index c29e126db..765218ea7 100644 --- a/core/src/service/effects/subcontainer/mod.rs +++ b/core/src/service/effects/subcontainer/mod.rs @@ -116,6 +116,8 @@ pub async fn create_subcontainer_fs( tracing::info!("Mounting overlay {guid} for {image_id}"); let subcontainer_wrapper = Subcontainer { overlay: OverlayGuard::mount_layers( + &[], + image, if context .seed .persistent_container @@ -130,8 +132,6 @@ pub async fn create_subcontainer_fs( } else { &[] }, - image, - &[], &mountpoint, ) .await?, diff --git a/core/src/util/io.rs b/core/src/util/io.rs index be40bf5a8..fbcd9f840 100644 --- a/core/src/util/io.rs +++ b/core/src/util/io.rs @@ -892,6 +892,16 @@ impl TmpDir { Ok(()) } + pub fn leak(mut self) { + std::mem::take(&mut self.path); + } + + pub async fn unmount_and_delete(self) -> Result<(), Error> { + crate::disk::mount::util::unmount_all_under(&self.path, false).await?; + tokio::fs::remove_dir_all(&self.path).await?; + Ok(()) + } + pub async fn gc(self: Arc) -> Result<(), Error> { if let Ok(dir) = Arc::try_unwrap(self) { dir.delete().await