hardware acceleration and support for NVIDIA cards on nonfree images (#3089)

* add nvidia packages

* add nvidia deps to nonfree

* gpu_acceleration flag & nvidia hacking

* fix gpu_config & /tmp/lxc.log

* implement hardware acceleration more dynamically

* refactor OpenUI

* use mknod

* registry updates for multi-hardware-requirements

* pluralize

* handle new registry types

* remove log

* migrations and driver fixes

* wip

* misc patches

* handle nvidia-container differently

* chore: comments (#3093)

* chore: comments

* revert some sizing

---------

Co-authored-by: Matt Hill <mattnine@protonmail.com>

* Revert "handle nvidia-container differently"

This reverts commit d708ae53df.

* fix debian containers

* cleanup

* feat: add empty array placeholder in forms (#3095)

* fixes from testing, client side device filtering for better fingerprinting resistance

* fix mac builds

---------

Co-authored-by: Sam Sartor <me@samsartor.com>
Co-authored-by: Matt Hill <mattnine@protonmail.com>
Co-authored-by: Alex Inkin <alexander@inkin.ru>
This commit is contained in:
Aiden McClelland
2026-01-15 11:42:17 -08:00
committed by GitHub
parent e8ef39adad
commit 99871805bd
95 changed files with 2758 additions and 1092 deletions

View File

@@ -5,11 +5,13 @@ use std::sync::{Arc, Weak};
use std::time::Duration;
use clap::builder::ValueParserFactory;
use futures::StreamExt;
use futures::future::BoxFuture;
use futures::{FutureExt, StreamExt};
use imbl_value::InternedString;
use rpc_toolkit::yajrc::RpcError;
use rpc_toolkit::{RpcRequest, RpcResponse};
use serde::{Deserialize, Serialize};
use tokio::fs::ReadDir;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command;
use tokio::sync::Mutex;
@@ -27,7 +29,7 @@ use crate::disk::mount::util::unmount;
use crate::prelude::*;
use crate::rpc_continuations::{Guid, RpcContinuation};
use crate::service::ServiceStats;
use crate::util::io::open_file;
use crate::util::io::{open_file, write_file_owned_atomic};
use crate::util::rpc_client::UnixRpcClient;
use crate::util::{FromStrParser, Invoke, new_guid};
use crate::{InvalidId, PackageId};
@@ -37,6 +39,7 @@ const RPC_DIR: &str = "media/startos/rpc"; // must not be absolute path
pub const CONTAINER_RPC_SERVER_SOCKET: &str = "service.sock"; // must not be absolute path
pub const HOST_RPC_SERVER_SOCKET: &str = "host.sock"; // must not be absolute path
const CONTAINER_DHCP_TIMEOUT: Duration = Duration::from_secs(30);
const HARDWARE_ACCELERATION_PATHS: &[&str] = &["/dev/dri", "/dev/nvidia*", "/dev/kfd"];
#[derive(
Clone, Debug, Serialize, Deserialize, Default, PartialEq, Eq, PartialOrd, Ord, Hash, TS,
@@ -174,12 +177,8 @@ impl LxcContainer {
let machine_id = hex::encode(rand::random::<[u8; 16]>());
let container_dir = Path::new(LXC_CONTAINER_DIR).join(&*guid);
tokio::fs::create_dir_all(&container_dir).await?;
tokio::fs::write(
container_dir.join("config"),
format!(include_str!("./config.template"), guid = &*guid),
)
.await?;
// TODO: append config
let config_str = format!(include_str!("./config.template"), guid = &*guid);
tokio::fs::write(container_dir.join("config"), config_str).await?;
let rootfs_dir = container_dir.join("rootfs");
let rootfs = OverlayGuard::mount(
TmpMountGuard::mount(
@@ -197,8 +196,25 @@ impl LxcContainer {
&rootfs_dir,
)
.await?;
tokio::fs::write(rootfs_dir.join("etc/machine-id"), format!("{machine_id}\n")).await?;
tokio::fs::write(rootfs_dir.join("etc/hostname"), format!("{guid}\n")).await?;
Command::new("chown")
.arg("100000:100000")
.arg(&rootfs_dir)
.invoke(ErrorKind::Filesystem)
.await?;
write_file_owned_atomic(
rootfs_dir.join("etc/machine-id"),
format!("{machine_id}\n"),
100000,
100000,
)
.await?;
write_file_owned_atomic(
rootfs_dir.join("etc/hostname"),
format!("{guid}\n"),
100000,
100000,
)
.await?;
Command::new("sed")
.arg("-i")
.arg(format!("s/LXC_NAME/{guid}/g"))
@@ -248,9 +264,13 @@ impl LxcContainer {
.arg("-d")
.arg("--name")
.arg(&*guid)
.arg("-o")
.arg(format!("/run/startos/LXC_{guid}.log"))
.arg("-l")
.arg("DEBUG")
.invoke(ErrorKind::Lxc)
.await?;
Ok(Self {
let res = Self {
manager: Arc::downgrade(manager),
rootfs,
guid: Arc::new(ContainerId::try_from(&*guid)?),
@@ -258,7 +278,84 @@ impl LxcContainer {
config,
exited: false,
log_mount,
})
};
if res.config.hardware_acceleration {
res.handle_devices(
tokio::fs::read_dir("/dev")
.await
.with_ctx(|_| (ErrorKind::Filesystem, "readdir /dev"))?,
HARDWARE_ACCELERATION_PATHS,
)
.await?;
}
Ok(res)
}
#[cfg(not(target_os = "linux"))]
async fn handle_devices(&self, _: ReadDir, _: &[&str]) -> Result<(), Error> {
Ok(())
}
#[cfg(target_os = "linux")]
fn handle_devices<'a>(
&'a self,
mut dir: ReadDir,
matches: &'a [&'a str],
) -> BoxFuture<'a, Result<(), Error>> {
use std::os::linux::fs::MetadataExt;
use std::os::unix::fs::FileTypeExt;
async move {
while let Some(ent) = dir.next_entry().await? {
let path = ent.path();
if let Some(matches) = if matches.is_empty() {
Some(Vec::new())
} else {
let mut new_matches = Vec::new();
for mut m in matches.iter().copied() {
let could_match = if let Some(prefix) = m.strip_suffix("*") {
m = prefix;
path.to_string_lossy().starts_with(m)
} else {
path.starts_with(m)
} || Path::new(m).starts_with(&path);
if could_match {
new_matches.push(m);
}
}
if new_matches.is_empty() {
None
} else {
Some(new_matches)
}
} {
let meta = ent.metadata().await?;
let ty = meta.file_type();
if ty.is_dir() {
self.handle_devices(
tokio::fs::read_dir(&path).await.with_ctx(|_| {
(ErrorKind::Filesystem, format!("readdir {path:?}"))
})?,
&matches,
)
.await?;
} else {
let ty = if ty.is_char_device() {
'c'
} else if ty.is_block_device() {
'b'
} else {
continue;
};
let rdev = meta.st_rdev();
let major = ((rdev >> 8) & 0xfff) as u32;
let minor = ((rdev & 0xff) | ((rdev >> 12) & 0xfff00)) as u32;
self.mknod(&path, ty, major, minor).await?;
}
}
}
Ok(())
}
.boxed()
}
pub fn rootfs_dir(&self) -> &Path {
@@ -329,7 +426,7 @@ impl LxcContainer {
.await?;
self.rpc_bind.take().unmount().await?;
if let Some(log_mount) = self.log_mount.take() {
log_mount.unmount(true).await?;
log_mount.unmount(false).await?;
}
self.rootfs.take().unmount(true).await?;
let rootfs_path = self.rootfs_dir();
@@ -351,7 +448,10 @@ impl LxcContainer {
.invoke(ErrorKind::Lxc)
.await?;
self.exited = true;
#[allow(unused_assignments)]
{
self.exited = true;
}
Ok(())
}
@@ -361,6 +461,17 @@ impl LxcContainer {
let sock_path = self.rpc_dir().join(CONTAINER_RPC_SERVER_SOCKET);
while tokio::fs::metadata(&sock_path).await.is_err() {
if timeout.map_or(false, |t| started.elapsed() > t) {
tracing::error!(
"{:?}",
Command::new("lxc-attach")
.arg(&**self.guid)
.arg("--")
.arg("systemctl")
.arg("status")
.arg("container-runtime")
.invoke(ErrorKind::Unknown)
.await
);
return Err(Error::new(
eyre!("timed out waiting for socket"),
ErrorKind::Timeout,
@@ -371,6 +482,88 @@ impl LxcContainer {
tracing::info!("Connected to socket in {:?}", started.elapsed());
Ok(UnixRpcClient::new(sock_path))
}
pub async fn mknod(&self, path: &Path, ty: char, major: u32, minor: u32) -> Result<(), Error> {
if let Ok(dev_rel) = path.strip_prefix("/dev") {
let parent = dev_rel.parent();
let media_dev = self.rootfs_dir().join("media/startos/dev");
let target_path = media_dev.join(dev_rel);
if tokio::fs::metadata(&target_path).await.is_ok() {
return Ok(());
}
if let Some(parent) = parent {
let p = media_dev.join(parent);
tokio::fs::create_dir_all(&p)
.await
.with_ctx(|_| (ErrorKind::Filesystem, format!("mkdir -p {p:?}")))?;
for p in parent.ancestors() {
Command::new("chown")
.arg("100000:100000")
.arg(media_dev.join(p))
.invoke(ErrorKind::Filesystem)
.await?;
}
}
Command::new("mknod")
.arg(&target_path)
.arg(&*InternedString::from_display(&ty))
.arg(&*InternedString::from_display(&major))
.arg(&*InternedString::from_display(&minor))
.invoke(ErrorKind::Filesystem)
.await?;
Command::new("chown")
.arg("100000:100000")
.arg(&target_path)
.invoke(ErrorKind::Filesystem)
.await?;
if let Some(parent) = parent {
Command::new("lxc-attach")
.arg(&**self.guid)
.arg("--")
.arg("mkdir")
.arg("-p")
.arg(Path::new("/dev").join(parent))
.invoke(ErrorKind::Lxc)
.await?;
}
Command::new("lxc-attach")
.arg(&**self.guid)
.arg("--")
.arg("touch")
.arg(&path)
.invoke(ErrorKind::Lxc)
.await?;
Command::new("lxc-attach")
.arg(&**self.guid)
.arg("--")
.arg("mount")
.arg("--bind")
.arg(Path::new("/media/startos/dev").join(dev_rel))
.arg(&path)
.invoke(ErrorKind::Lxc)
.await?;
} else {
let target_path = self
.rootfs_dir()
.join(path.strip_prefix("/").unwrap_or(&path));
if tokio::fs::metadata(&target_path).await.is_ok() {
return Ok(());
}
Command::new("mknod")
.arg(&target_path)
.arg(&*InternedString::from_display(&ty))
.arg(&*InternedString::from_display(&major))
.arg(&*InternedString::from_display(&minor))
.invoke(ErrorKind::Filesystem)
.await?;
Command::new("chown")
.arg("100000:100000")
.arg(&target_path)
.invoke(ErrorKind::Filesystem)
.await?;
}
Ok(())
}
}
impl Drop for LxcContainer {
fn drop(&mut self) {
@@ -414,7 +607,10 @@ impl Drop for LxcContainer {
}
#[derive(Default, Serialize)]
pub struct LxcConfig {}
pub struct LxcConfig {
pub hardware_acceleration: bool,
}
pub async fn connect(ctx: &RpcContext, container: &LxcContainer) -> Result<Guid, Error> {
use axum::extract::ws::Message;