From ec05f67087255b659b3f10f2eea43f1ff928593b Mon Sep 17 00:00:00 2001 From: Aiden McClelland Date: Tue, 30 Dec 2025 13:21:42 -0700 Subject: [PATCH] implement hardware acceleration more dynamically --- core/src/lxc/gpu_config | 20 ---- core/src/lxc/mod.rs | 115 ++++++++++++++++++++-- core/src/s9pk/v2/compat.rs | 2 +- core/src/s9pk/v2/manifest.rs | 2 +- core/src/service/persistent_container.rs | 2 +- sdk/base/lib/osBindings/Manifest.ts | 1 + sdk/base/lib/types/ManifestTypes.ts | 33 +++++-- sdk/package/lib/manifest/setupManifest.ts | 1 + 8 files changed, 136 insertions(+), 40 deletions(-) delete mode 100644 core/src/lxc/gpu_config diff --git a/core/src/lxc/gpu_config b/core/src/lxc/gpu_config deleted file mode 100644 index 48d4584d0..000000000 --- a/core/src/lxc/gpu_config +++ /dev/null @@ -1,20 +0,0 @@ - -# NVIDIA Devices Spe -lxc.cgroup2.devices.allow = c 235:* rwm - -# DRM devices -lxc.mount.entry = /dev/dri dev/dri none bind,optional,create=di - -# Device IDs for /dev/dri, check `ls -l /dev/dri` -lxc.cgroup2.devices.allow = c 226:* rwm - -# Nvidia nodes -lxc.mount.entry = /dev/nvidia0 dev/nvidia0 none bind,optional,create=file -lxc.mount.entry = /dev/nvidiactl dev/nvidiactl none bind,optional,create=file -lxc.mount.entry = /dev/nvidia-uvm dev/nvidia-uvm none bind,optional,create=file -lxc.mount.entry = /dev/nvidia-modeset dev/nvidia-modeset none bind,optional,create=file - -# Device IDs for /dev/nvidia*, check `ls -l /dev/nvidia*` -lxc.cgroup2.devices.allow = c 195:* rwm -lxc.cgroup2.devices.allow = c 500:* rwm -lxc.cgroup2.devices.allow = c 503:* rwm diff --git a/core/src/lxc/mod.rs b/core/src/lxc/mod.rs index da9068fca..bcecf9a50 100644 --- a/core/src/lxc/mod.rs +++ b/core/src/lxc/mod.rs @@ -5,11 +5,13 @@ use std::sync::{Arc, Weak}; use std::time::Duration; use clap::builder::ValueParserFactory; -use futures::StreamExt; +use futures::future::BoxFuture; +use futures::{FutureExt, StreamExt}; use imbl_value::InternedString; use rpc_toolkit::yajrc::RpcError; use rpc_toolkit::{RpcRequest, RpcResponse}; use serde::{Deserialize, Serialize}; +use tokio::fs::ReadDir; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Command; use tokio::sync::Mutex; @@ -37,6 +39,8 @@ const RPC_DIR: &str = "media/startos/rpc"; // must not be absolute path pub const CONTAINER_RPC_SERVER_SOCKET: &str = "service.sock"; // must not be absolute path pub const HOST_RPC_SERVER_SOCKET: &str = "host.sock"; // must not be absolute path const CONTAINER_DHCP_TIMEOUT: Duration = Duration::from_secs(30); +const HARDWARE_ACCELERATION_PATHS: &[&str] = + &["/dev/dri/", "/dev/nvidia", "/dev/media", "/dev/video"]; #[derive( Clone, Debug, Serialize, Deserialize, Default, PartialEq, Eq, PartialOrd, Ord, Hash, TS, @@ -155,12 +159,91 @@ impl LxcManager { } } +fn handle_devices<'a>( + guid: &'a str, + rootfs: &'a Path, + mut dir: ReadDir, + guards: &'a mut Vec, + matches: &'a [&'a str], +) -> BoxFuture<'a, Result<(), Error>> { + use std::os::linux::fs::MetadataExt; + use std::os::unix::fs::FileTypeExt; + async move { + while let Some(ent) = dir.next_entry().await? { + let path = ent.path(); + if let Some(matches) = if matches.is_empty() { + Some(Vec::new()) + } else { + let mut new_matches = Vec::new(); + for m in matches { + if if m.ends_with("/") { + path.starts_with(m) + } else { + path.to_string_lossy().starts_with(*m) + } || Path::new(*m).starts_with(&path) + { + new_matches.push(*m); + } + } + if new_matches.is_empty() { + None + } else { + Some(new_matches) + } + } { + let meta = ent.metadata().await?; + let ty = meta.file_type(); + if ty.is_dir() { + handle_devices( + guid, + rootfs, + tokio::fs::read_dir(&path) + .await + .with_ctx(|_| (ErrorKind::Filesystem, format!("readdir {path:?}")))?, + guards, + &matches, + ) + .await?; + } else { + let ty = if ty.is_char_device() { + "c" + } else if ty.is_block_device() { + "b" + } else { + continue; + }; + let rdev = meta.st_rdev(); + let maj = ((rdev >> 8) & 0xfff) as u32; + let min = ((rdev & 0xff) | ((rdev >> 12) & 0xfff00)) as u32; + Command::new("lxc-cgroup") + .arg(guid) + .arg("devices.allow") + .arg(format!("{ty} {maj}:{min} rwm")) + .invoke(ErrorKind::Lxc) + .await?; + guards.push( + MountGuard::mount( + &Bind::new(&path), + rootfs.join(path.strip_prefix("/").unwrap_or(&path)), + ReadWrite, + ) + .await?, + ); + } + } + } + Ok(()) + } + .boxed() +} + pub struct LxcContainer { manager: Weak, rootfs: OverlayGuard, pub guid: Arc, rpc_bind: TmpMountGuard, log_mount: Option, + devices: Vec, config: LxcConfig, exited: bool, } @@ -174,10 +257,7 @@ impl LxcContainer { let machine_id = hex::encode(rand::random::<[u8; 16]>()); let container_dir = Path::new(LXC_CONTAINER_DIR).join(&*guid); tokio::fs::create_dir_all(&container_dir).await?; - let mut config_str = format!(include_str!("./config.template"), guid = &*guid); - if config.gpu_acceleration { - config_str += include_str!("./gpu_config"); - } + let config_str = format!(include_str!("./config.template"), guid = &*guid); tokio::fs::write(container_dir.join("config"), config_str).await?; // TODO: append config let rootfs_dir = container_dir.join("rootfs"); @@ -249,11 +329,24 @@ impl LxcContainer { .arg("--name") .arg(&*guid) .arg("-o") - .arg("/tmp/lxc.log") + .arg(format!("/run/startos/LXC_{guid}.log")) .arg("-l") - .arg("debug") + .arg("DEBUG") .invoke(ErrorKind::Lxc) .await?; + let mut devices = Vec::new(); + if config.hardware_acceleration { + handle_devices( + &*guid, + rootfs.path(), + tokio::fs::read_dir("/dev") + .await + .with_ctx(|_| (ErrorKind::Filesystem, "readdir /dev"))?, + &mut devices, + HARDWARE_ACCELERATION_PATHS, + ) + .await?; + } Ok(Self { manager: Arc::downgrade(manager), rootfs, @@ -262,6 +355,7 @@ impl LxcContainer { config, exited: false, log_mount, + devices, }) } @@ -333,7 +427,10 @@ impl LxcContainer { .await?; self.rpc_bind.take().unmount().await?; if let Some(log_mount) = self.log_mount.take() { - log_mount.unmount(true).await?; + log_mount.unmount(false).await?; + } + for device in std::mem::take(&mut self.devices) { + device.unmount(false).await?; } self.rootfs.take().unmount(true).await?; let rootfs_path = self.rootfs_dir(); @@ -419,7 +516,7 @@ impl Drop for LxcContainer { #[derive(Default, Serialize)] pub struct LxcConfig { - pub gpu_acceleration: bool, + pub hardware_acceleration: bool, } pub async fn connect(ctx: &RpcContext, container: &LxcContainer) -> Result { diff --git a/core/src/s9pk/v2/compat.rs b/core/src/s9pk/v2/compat.rs index 1439968ac..2d31bfcd2 100644 --- a/core/src/s9pk/v2/compat.rs +++ b/core/src/s9pk/v2/compat.rs @@ -251,7 +251,7 @@ impl TryFrom for Manifest { git_hash: value.git_hash, os_version: value.eos_version, sdk_version: None, - gpu_acceleration: match value.main { + hardware_acceleration: match value.main { PackageProcedure::Docker(d) => d.gpu_acceleration, PackageProcedure::Script(_) => false, }, diff --git a/core/src/s9pk/v2/manifest.rs b/core/src/s9pk/v2/manifest.rs index 1037773be..48013f28a 100644 --- a/core/src/s9pk/v2/manifest.rs +++ b/core/src/s9pk/v2/manifest.rs @@ -63,7 +63,7 @@ pub struct Manifest { #[serde(default)] pub hardware_requirements: HardwareRequirements, #[serde(default)] - pub gpu_acceleration: bool, + pub hardware_acceleration: bool, pub git_hash: Option, #[serde(default = "current_version")] #[ts(type = "string")] diff --git a/core/src/service/persistent_container.rs b/core/src/service/persistent_container.rs index 8c8131b48..e71b5fd5b 100644 --- a/core/src/service/persistent_container.rs +++ b/core/src/service/persistent_container.rs @@ -97,7 +97,7 @@ impl PersistentContainer { .join(&s9pk.as_manifest().id), ), LxcConfig { - gpu_acceleration: s9pk.manifest.gpu_acceleration, + hardware_acceleration: s9pk.manifest.hardware_acceleration, }, ) .await?; diff --git a/sdk/base/lib/osBindings/Manifest.ts b/sdk/base/lib/osBindings/Manifest.ts index 8e44e2056..1d0dbfe0a 100644 --- a/sdk/base/lib/osBindings/Manifest.ts +++ b/sdk/base/lib/osBindings/Manifest.ts @@ -31,6 +31,7 @@ export type Manifest = { alerts: Alerts dependencies: Dependencies hardwareRequirements: HardwareRequirements + hardwareAcceleration: boolean gitHash: GitHash | null osVersion: string sdkVersion: string | null diff --git a/sdk/base/lib/types/ManifestTypes.ts b/sdk/base/lib/types/ManifestTypes.ts index 6b4e47da7..e498ee06a 100644 --- a/sdk/base/lib/types/ManifestTypes.ts +++ b/sdk/base/lib/types/ManifestTypes.ts @@ -128,16 +128,17 @@ export type SDKManifest = { /** * @description (optional) A set of hardware requirements for this service. If the user's machine * does not meet these requirements, they will not be able to install this service. - * @property {object[]} devices - TODO Aiden confirm type on the left. List of required devices (displays or processors). + * @property {object[]} devices - List of required devices (display or processor). + * `pattern` refers to a regular expression that at least one device of the specified class must match + * `patternDescription` is what will be displayed to the user about what kind of device is required * @property {number} ram - Minimum RAM requirement (in megabytes MB) * @property {string[]} arch - List of supported arches * @example * ``` - TODO Aiden verify below and provide examples for devices hardwareRequirements: { devices: [ - { class: 'display', value: '' }, - { class: 'processor', value: '' }, + { class: 'display', pattern: 'CometLake', patternDescription: 'A CometLake (10th generation) Intel Integrated GPU' }, + { class: 'processor', pattern: 'i[3579]-10[0-9]{3}U CPU', patternDescription: 'A 10th Generation Intel i-Series processor' }, ], ram: 8192, arch: ['x86-64'], @@ -149,14 +150,30 @@ export type SDKManifest = { readonly ram?: number | null readonly arch?: string[] | null } + + /** + * @description Enable access to hardware acceleration devices (such as /dev/dri, or /dev/nvidia*) + */ + readonly hardwareAcceleration?: boolean } // this is hacky but idk a more elegant way type ArchOptions = { - 0: ["x86_64", "aarch64"] - 1: ["aarch64", "x86_64"] - 2: ["x86_64"] - 3: ["aarch64"] + 0: ["x86_64", "aarch64", "riscv64"] + 1: ["aarch64", "x86_64", "riscv64"] + 2: ["x86_64", "riscv64", "aarch64"] + 3: ["aarch64", "riscv64", "x86_64"] + 4: ["riscv64", "x86_64", "aarch64"] + 5: ["riscv64", "aarch64", "x86_64"] + 6: ["x86_64", "aarch64"] + 7: ["aarch64", "x86_64"] + 8: ["x86_64", "riscv64"] + 9: ["aarch64", "riscv64"] + 10: ["riscv64", "aarch64"] + 11: ["riscv64", "x86_64"] + 12: ["x86_64"] + 13: ["aarch64"] + 14: ["riscv64"] } export type SDKImageInputSpec = { [A in keyof ArchOptions]: { diff --git a/sdk/package/lib/manifest/setupManifest.ts b/sdk/package/lib/manifest/setupManifest.ts index 9b886e16e..2079782c3 100644 --- a/sdk/package/lib/manifest/setupManifest.ts +++ b/sdk/package/lib/manifest/setupManifest.ts @@ -90,5 +90,6 @@ export function buildManifest< ) : manifest.hardwareRequirements?.arch, }, + hardwareAcceleration: manifest.hardwareAcceleration ?? false, } }