This commit is contained in:
Aiden McClelland
2026-01-07 22:29:08 -07:00
parent 56b7aa07d6
commit f135259c63
9 changed files with 171 additions and 61 deletions

View File

@@ -1,6 +0,0 @@
install:
dkms
linux-headers-$(uname-r)
then run:
https://us.download.nvidia.com/XFree86/aarch64/580.119.02/NVIDIA-Linux-aarch64-580.119.02.run

View File

@@ -16,6 +16,7 @@ dnsutils
dosfstools
e2fsprogs
ecryptfs-utils
equivs
exfatprogs
flashrom
fuse3

View File

@@ -203,16 +203,14 @@ cat > config/hooks/normal/9000-install-startos.hook.chroot << EOF
set -e
if [ "${NON_FREE}" = "1" ]; then
# install latest NVIDIA driver in a given major series
# install a specific NVIDIA driver version
# ---------------- configuration ----------------
# Major driver series, e.g. 550, 560, 570
NVIDIA_MAJOR="\${NVIDIA_MAJOR:-580}"
NVIDIA_DRIVER_VERSION="\${NVIDIA_DRIVER_VERSION:-580.119.02}"
BASE_URL="https://download.nvidia.com/XFree86/Linux-${QEMU_ARCH}"
echo "[nvidia-hook] Using NVIDIA major series: \${NVIDIA_MAJOR}" >&2
echo "[nvidia-hook] Using NVIDIA driver: \${NVIDIA_DRIVER_VERSION}" >&2
# ---------------- kernel version ----------------
@@ -231,36 +229,26 @@ if [ "${NON_FREE}" = "1" ]; then
echo "[nvidia-hook] Target kernel version: \${KVER}" >&2
# Ensure kernel headers are present
TEMP_APT_DEPS=(build-essential)
if [ ! -e "/lib/modules/\${KVER}/build" ]; then
apt-get install linux-headers-\${KVER}
echo "[nvidia-hook] ERROR: /lib/modules/\${KVER}/build missing; install headers for \${KVER} before this hook." >&2
exit 1
TEMP_APT_DEPS+=(linux-headers-\${KVER})
fi
# ---------------- find latest driver in major series ----------------
echo "[nvidia-hook] Installing build dependencies" >&2
echo "[nvidia-hook] Fetching version list from \${BASE_URL}/" >&2
NVIDIA_VER="\$(
curl -fsSL "\${BASE_URL}/" \
| sed -n 's/.*href="([0-9][0-9][0-9][0-9.]*)/".*/\u0001/p' \
| grep -E "^\${NVIDIA_MAJOR}." \
| sort -V \
| tail -n1
)"
if [ -z "\${NVIDIA_VER}" ]; then
echo "[nvidia-hook] ERROR: could not find NVIDIA series \${NVIDIA_MAJOR} under \${BASE_URL}/" >&2
exit 1
fi
echo "[nvidia-hook] Selected NVIDIA driver version: \${NVIDIA_VER}" >&2
/usr/lib/startos/scripts/install-equivs <<-EOF
Package: nvidia-depends
Version: \${NVIDIA_DRIVER_VERSION}
Section: unknown
Priority: optional
Depends: \${dep_list="\$(IFS=', '; echo "\${TEMP_APT_DEPS[*]}")"}
EOF
# ---------------- download and run installer ----------------
RUN_NAME="NVIDIA-Linux-${QEMU_ARCH}-\${NVIDIA_VER}.run"
RUN_NAME="NVIDIA-Linux-${QEMU_ARCH}-\${NVIDIA_DRIVER_VERSION}.run"
RUN_PATH="/root/\${RUN_NAME}"
RUN_URL="\${BASE_URL}/\${NVIDIA_VER}/\${RUN_NAME}"
RUN_URL="\${BASE_URL}/\${NVIDIA_DRIVER_VERSION}/\${RUN_NAME}"
echo "[nvidia-hook] Downloading \${RUN_URL}" >&2
wget -O "\${RUN_PATH}" "\${RUN_URL}"
@@ -279,7 +267,12 @@ if [ "${NON_FREE}" = "1" ]; then
echo "[nvidia-hook] Running depmod for \${KVER}" >&2
depmod -a "\${KVER}"
echo "[nvidia-hook] NVIDIA \${NVIDIA_VER} installation complete for kernel \${KVER}" >&2
echo "[nvidia-hook] NVIDIA \${NVIDIA_DRIVER_VERSION} installation complete for kernel \${KVER}" >&2
echo "[nvidia-hook] Removing build dependencies..." >&2
apt-get purge -y nvidia-depends
apt-get autoremove -y
echo "[nvidia-hook] Removed build dependencies." >&2
fi
cp /etc/resolv.conf /etc/resolv.conf.bak

View File

@@ -0,0 +1,20 @@
#!/bin/bash
export DEBIAN_FRONTEND=noninteractive
export DEBCONF_NONINTERACTIVE_SEEN=true
TMP_DIR=$(mktemp -d)
(
set -e
cd $TMP_DIR
cat > control.equivs
equivs-build control.equivs
apt-get install -y ./*.deb < /dev/null
)
rm -rf $TMP_DIR
echo Install complete. >&2
exit 0

View File

@@ -15,4 +15,7 @@ case $ARCH in
DOCKER_PLATFORM=linux/arm64;;
esac
docker run --rm $USE_TTY --platform=$DOCKER_PLATFORM -eARCH --privileged -v "$(pwd):/root/start-os" start9/build-env /root/start-os/container-runtime/update-image.sh
docker run --rm $USE_TTY --platform=$DOCKER_PLATFORM -eARCH --privileged -v "$(pwd):/root/start-os" start9/build-env /root/start-os/container-runtime/update-image.sh
if [ "$(ls -nd "rootfs.${ARCH}.squashfs" | awk '{ print $3 }')" != "$UID" ]; then
docker run --rm $USE_TTY -v "$(pwd):/root/start-os" start9/build-env chown -R $UID:$UID /root/start-os/container-runtime
fi

View File

@@ -27,8 +27,10 @@ use crate::context::config::ServerConfig;
use crate::db::model::Database;
use crate::db::model::package::TaskSeverity;
use crate::disk::OsPartitionInfo;
use crate::disk::mount::filesystem::ReadOnly;
use crate::disk::mount::filesystem::bind::Bind;
use crate::disk::mount::filesystem::block_dev::BlockDev;
use crate::disk::mount::filesystem::loop_dev::LoopDev;
use crate::disk::mount::filesystem::{FileSystem, ReadOnly};
use crate::disk::mount::guard::MountGuard;
use crate::init::{InitResult, check_time_is_synchronized};
use crate::install::PKG_ARCHIVE_DIR;
@@ -49,7 +51,7 @@ use crate::service::effects::subcontainer::NVIDIA_OVERLAY_PATH;
use crate::shutdown::Shutdown;
use crate::util::Invoke;
use crate::util::future::NonDetachingJoinHandle;
use crate::util::io::delete_file;
use crate::util::io::{TmpDir, delete_file};
use crate::util::lshw::LshwDevice;
use crate::util::sync::{SyncMutex, SyncRwLock, Watch};
use crate::{ActionId, DATA_DIR, PLATFORM, PackageId};
@@ -174,35 +176,82 @@ impl RpcContext {
tracing::info!("Initialized Net Controller");
if PLATFORM.ends_with("-nonfree") {
if let Err(e) = Command::new("nvidia-modprobe")
if let Err(e) = Command::new("nvidia-smi")
.invoke(ErrorKind::ParseSysInfo)
.await
{
tracing::warn!("nvidia-modprobe: {e}");
tracing::info!("The above warning can be ignored if no NVIDIA card is present");
} else {
if let Some(procfs) = MountGuard::mount(
&Bind::new("/proc"),
Path::new(NVIDIA_OVERLAY_PATH).join("proc"),
ReadOnly,
)
.await
.log_err()
{
Command::new("nvidia-container-cli")
.arg("configure")
.arg("--no-devbind")
.arg("--no-cgroups")
.arg("--utility")
.arg("--compute")
.arg("--graphics")
.arg("--video")
.arg(NVIDIA_OVERLAY_PATH)
.invoke(ErrorKind::Unknown)
.await
.log_err();
procfs.unmount(true).await.log_err();
async {
let version: InternedString = String::from_utf8(
Command::new("modinfo")
.arg("-F")
.arg("version")
.arg("nvidia")
.invoke(ErrorKind::ParseSysInfo)
.await?,
)?
.trim()
.into();
let sqfs = Path::new("/media/startos/data/package-data/nvidia")
.join(&*version)
.join("container-overlay.squashfs");
if tokio::fs::metadata(&sqfs).await.is_err() {
let tmp = TmpDir::new().await?;
let procfs = MountGuard::mount(
&Bind::new("/proc"),
Path::new(&*tmp).join("proc"),
ReadOnly,
)
.await?;
Command::new("nvidia-container-cli")
.arg("configure")
.arg("--no-devbind")
.arg("--no-cgroups")
.arg("--utility")
.arg("--compute")
.arg("--graphics")
.arg("--video")
.arg(&*tmp)
.invoke(ErrorKind::Unknown)
.await?;
procfs.unmount(true).await?;
Command::new("ln")
.arg("-rsf")
.arg(
tmp.join("usr/lib64/libnvidia-ml.so")
.with_added_extension(&*version),
)
.arg(tmp.join("usr/lib64/libnvidia-ml.so.1"))
.invoke(ErrorKind::Filesystem)
.await?;
Command::new("chown")
.arg("-R")
.arg("100000:100000")
.arg(&*tmp)
.invoke(ErrorKind::Filesystem)
.await?;
if let Some(p) = sqfs.parent() {
tokio::fs::create_dir_all(p)
.await
.with_ctx(|_| (ErrorKind::Filesystem, format!("mkdir -p {p:?}")))?;
}
Command::new("mksquashfs")
.arg(&*tmp)
.arg(&sqfs)
.invoke(ErrorKind::Filesystem)
.await?;
tmp.unmount_and_delete().await?;
}
BlockDev::new(&sqfs)
.mount(NVIDIA_OVERLAY_PATH, ReadOnly)
.await?;
Ok::<_, Error>(())
}
.await
.log_err();
}
}

View File

@@ -3,6 +3,7 @@ use std::path::Path;
use tracing::instrument;
use crate::Error;
use crate::prelude::*;
use crate::util::Invoke;
pub async fn is_mountpoint(path: impl AsRef<Path>) -> Result<bool, Error> {
@@ -56,3 +57,42 @@ pub async fn unmount<P: AsRef<Path>>(mountpoint: P, lazy: bool) -> Result<(), Er
.await?;
Ok(())
}
/// Unmounts all mountpoints under (and including) the given path, in reverse
/// depth order so that nested mounts are unmounted before their parents.
#[instrument(skip_all)]
pub async fn unmount_all_under<P: AsRef<Path>>(path: P, lazy: bool) -> Result<(), Error> {
let path = path.as_ref();
let canonical_path = tokio::fs::canonicalize(path)
.await
.with_ctx(|_| (ErrorKind::Filesystem, lazy_format!("canonicalize {path:?}")))?;
let mounts_content = tokio::fs::read_to_string("/proc/mounts")
.await
.with_ctx(|_| (ErrorKind::Filesystem, "read /proc/mounts"))?;
// Collect all mountpoints under our path
let mut mountpoints: Vec<&str> = mounts_content
.lines()
.filter_map(|line| {
let mountpoint = line.split_whitespace().nth(1)?;
// Check if this mountpoint is under our target path
let mp_path = Path::new(mountpoint);
if mp_path.starts_with(&canonical_path) {
Some(mountpoint)
} else {
None
}
})
.collect();
// Sort by path length descending so we unmount deepest first
mountpoints.sort_by(|a, b| b.len().cmp(&a.len()));
for mountpoint in mountpoints {
tracing::debug!("Unmounting nested mountpoint: {}", mountpoint);
unmount(mountpoint, lazy).await?;
}
Ok(())
}

View File

@@ -116,6 +116,8 @@ pub async fn create_subcontainer_fs(
tracing::info!("Mounting overlay {guid} for {image_id}");
let subcontainer_wrapper = Subcontainer {
overlay: OverlayGuard::mount_layers(
&[],
image,
if context
.seed
.persistent_container
@@ -130,8 +132,6 @@ pub async fn create_subcontainer_fs(
} else {
&[]
},
image,
&[],
&mountpoint,
)
.await?,

View File

@@ -892,6 +892,16 @@ impl TmpDir {
Ok(())
}
pub fn leak(mut self) {
std::mem::take(&mut self.path);
}
pub async fn unmount_and_delete(self) -> Result<(), Error> {
crate::disk::mount::util::unmount_all_under(&self.path, false).await?;
tokio::fs::remove_dir_all(&self.path).await?;
Ok(())
}
pub async fn gc(self: Arc<Self>) -> Result<(), Error> {
if let Ok(dir) = Arc::try_unwrap(self) {
dir.delete().await