implement hardware acceleration more dynamically

This commit is contained in:
Aiden McClelland
2025-12-30 13:21:42 -07:00
parent 65e0a64c24
commit ec05f67087
8 changed files with 136 additions and 40 deletions

View File

@@ -1,20 +0,0 @@
# NVIDIA Devices Spe
lxc.cgroup2.devices.allow = c 235:* rwm
# DRM devices
lxc.mount.entry = /dev/dri dev/dri none bind,optional,create=di
# Device IDs for /dev/dri, check `ls -l /dev/dri`
lxc.cgroup2.devices.allow = c 226:* rwm
# Nvidia nodes
lxc.mount.entry = /dev/nvidia0 dev/nvidia0 none bind,optional,create=file
lxc.mount.entry = /dev/nvidiactl dev/nvidiactl none bind,optional,create=file
lxc.mount.entry = /dev/nvidia-uvm dev/nvidia-uvm none bind,optional,create=file
lxc.mount.entry = /dev/nvidia-modeset dev/nvidia-modeset none bind,optional,create=file
# Device IDs for /dev/nvidia*, check `ls -l /dev/nvidia*`
lxc.cgroup2.devices.allow = c 195:* rwm
lxc.cgroup2.devices.allow = c 500:* rwm
lxc.cgroup2.devices.allow = c 503:* rwm

View File

@@ -5,11 +5,13 @@ use std::sync::{Arc, Weak};
use std::time::Duration;
use clap::builder::ValueParserFactory;
use futures::StreamExt;
use futures::future::BoxFuture;
use futures::{FutureExt, StreamExt};
use imbl_value::InternedString;
use rpc_toolkit::yajrc::RpcError;
use rpc_toolkit::{RpcRequest, RpcResponse};
use serde::{Deserialize, Serialize};
use tokio::fs::ReadDir;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command;
use tokio::sync::Mutex;
@@ -37,6 +39,8 @@ const RPC_DIR: &str = "media/startos/rpc"; // must not be absolute path
pub const CONTAINER_RPC_SERVER_SOCKET: &str = "service.sock"; // must not be absolute path
pub const HOST_RPC_SERVER_SOCKET: &str = "host.sock"; // must not be absolute path
const CONTAINER_DHCP_TIMEOUT: Duration = Duration::from_secs(30);
const HARDWARE_ACCELERATION_PATHS: &[&str] =
&["/dev/dri/", "/dev/nvidia", "/dev/media", "/dev/video"];
#[derive(
Clone, Debug, Serialize, Deserialize, Default, PartialEq, Eq, PartialOrd, Ord, Hash, TS,
@@ -155,12 +159,91 @@ impl LxcManager {
}
}
fn handle_devices<'a>(
guid: &'a str,
rootfs: &'a Path,
mut dir: ReadDir,
guards: &'a mut Vec<MountGuard>,
matches: &'a [&'a str],
) -> BoxFuture<'a, Result<(), Error>> {
use std::os::linux::fs::MetadataExt;
use std::os::unix::fs::FileTypeExt;
async move {
while let Some(ent) = dir.next_entry().await? {
let path = ent.path();
if let Some(matches) = if matches.is_empty() {
Some(Vec::new())
} else {
let mut new_matches = Vec::new();
for m in matches {
if if m.ends_with("/") {
path.starts_with(m)
} else {
path.to_string_lossy().starts_with(*m)
} || Path::new(*m).starts_with(&path)
{
new_matches.push(*m);
}
}
if new_matches.is_empty() {
None
} else {
Some(new_matches)
}
} {
let meta = ent.metadata().await?;
let ty = meta.file_type();
if ty.is_dir() {
handle_devices(
guid,
rootfs,
tokio::fs::read_dir(&path)
.await
.with_ctx(|_| (ErrorKind::Filesystem, format!("readdir {path:?}")))?,
guards,
&matches,
)
.await?;
} else {
let ty = if ty.is_char_device() {
"c"
} else if ty.is_block_device() {
"b"
} else {
continue;
};
let rdev = meta.st_rdev();
let maj = ((rdev >> 8) & 0xfff) as u32;
let min = ((rdev & 0xff) | ((rdev >> 12) & 0xfff00)) as u32;
Command::new("lxc-cgroup")
.arg(guid)
.arg("devices.allow")
.arg(format!("{ty} {maj}:{min} rwm"))
.invoke(ErrorKind::Lxc)
.await?;
guards.push(
MountGuard::mount(
&Bind::new(&path),
rootfs.join(path.strip_prefix("/").unwrap_or(&path)),
ReadWrite,
)
.await?,
);
}
}
}
Ok(())
}
.boxed()
}
pub struct LxcContainer {
manager: Weak<LxcManager>,
rootfs: OverlayGuard<TmpMountGuard>,
pub guid: Arc<ContainerId>,
rpc_bind: TmpMountGuard,
log_mount: Option<MountGuard>,
devices: Vec<MountGuard>,
config: LxcConfig,
exited: bool,
}
@@ -174,10 +257,7 @@ impl LxcContainer {
let machine_id = hex::encode(rand::random::<[u8; 16]>());
let container_dir = Path::new(LXC_CONTAINER_DIR).join(&*guid);
tokio::fs::create_dir_all(&container_dir).await?;
let mut config_str = format!(include_str!("./config.template"), guid = &*guid);
if config.gpu_acceleration {
config_str += include_str!("./gpu_config");
}
let config_str = format!(include_str!("./config.template"), guid = &*guid);
tokio::fs::write(container_dir.join("config"), config_str).await?;
// TODO: append config
let rootfs_dir = container_dir.join("rootfs");
@@ -249,11 +329,24 @@ impl LxcContainer {
.arg("--name")
.arg(&*guid)
.arg("-o")
.arg("/tmp/lxc.log")
.arg(format!("/run/startos/LXC_{guid}.log"))
.arg("-l")
.arg("debug")
.arg("DEBUG")
.invoke(ErrorKind::Lxc)
.await?;
let mut devices = Vec::new();
if config.hardware_acceleration {
handle_devices(
&*guid,
rootfs.path(),
tokio::fs::read_dir("/dev")
.await
.with_ctx(|_| (ErrorKind::Filesystem, "readdir /dev"))?,
&mut devices,
HARDWARE_ACCELERATION_PATHS,
)
.await?;
}
Ok(Self {
manager: Arc::downgrade(manager),
rootfs,
@@ -262,6 +355,7 @@ impl LxcContainer {
config,
exited: false,
log_mount,
devices,
})
}
@@ -333,7 +427,10 @@ impl LxcContainer {
.await?;
self.rpc_bind.take().unmount().await?;
if let Some(log_mount) = self.log_mount.take() {
log_mount.unmount(true).await?;
log_mount.unmount(false).await?;
}
for device in std::mem::take(&mut self.devices) {
device.unmount(false).await?;
}
self.rootfs.take().unmount(true).await?;
let rootfs_path = self.rootfs_dir();
@@ -419,7 +516,7 @@ impl Drop for LxcContainer {
#[derive(Default, Serialize)]
pub struct LxcConfig {
pub gpu_acceleration: bool,
pub hardware_acceleration: bool,
}
pub async fn connect(ctx: &RpcContext, container: &LxcContainer) -> Result<Guid, Error> {

View File

@@ -251,7 +251,7 @@ impl TryFrom<ManifestV1> for Manifest {
git_hash: value.git_hash,
os_version: value.eos_version,
sdk_version: None,
gpu_acceleration: match value.main {
hardware_acceleration: match value.main {
PackageProcedure::Docker(d) => d.gpu_acceleration,
PackageProcedure::Script(_) => false,
},

View File

@@ -63,7 +63,7 @@ pub struct Manifest {
#[serde(default)]
pub hardware_requirements: HardwareRequirements,
#[serde(default)]
pub gpu_acceleration: bool,
pub hardware_acceleration: bool,
pub git_hash: Option<GitHash>,
#[serde(default = "current_version")]
#[ts(type = "string")]

View File

@@ -97,7 +97,7 @@ impl PersistentContainer {
.join(&s9pk.as_manifest().id),
),
LxcConfig {
gpu_acceleration: s9pk.manifest.gpu_acceleration,
hardware_acceleration: s9pk.manifest.hardware_acceleration,
},
)
.await?;

View File

@@ -31,6 +31,7 @@ export type Manifest = {
alerts: Alerts
dependencies: Dependencies
hardwareRequirements: HardwareRequirements
hardwareAcceleration: boolean
gitHash: GitHash | null
osVersion: string
sdkVersion: string | null

View File

@@ -128,16 +128,17 @@ export type SDKManifest = {
/**
* @description (optional) A set of hardware requirements for this service. If the user's machine
* does not meet these requirements, they will not be able to install this service.
* @property {object[]} devices - TODO Aiden confirm type on the left. List of required devices (displays or processors).
* @property {object[]} devices - List of required devices (display or processor).
* `pattern` refers to a regular expression that at least one device of the specified class must match
* `patternDescription` is what will be displayed to the user about what kind of device is required
* @property {number} ram - Minimum RAM requirement (in megabytes MB)
* @property {string[]} arch - List of supported arches
* @example
* ```
TODO Aiden verify below and provide examples for devices
hardwareRequirements: {
devices: [
{ class: 'display', value: '' },
{ class: 'processor', value: '' },
{ class: 'display', pattern: 'CometLake', patternDescription: 'A CometLake (10th generation) Intel Integrated GPU' },
{ class: 'processor', pattern: 'i[3579]-10[0-9]{3}U CPU', patternDescription: 'A 10th Generation Intel i-Series processor' },
],
ram: 8192,
arch: ['x86-64'],
@@ -149,14 +150,30 @@ export type SDKManifest = {
readonly ram?: number | null
readonly arch?: string[] | null
}
/**
* @description Enable access to hardware acceleration devices (such as /dev/dri, or /dev/nvidia*)
*/
readonly hardwareAcceleration?: boolean
}
// this is hacky but idk a more elegant way
type ArchOptions = {
0: ["x86_64", "aarch64"]
1: ["aarch64", "x86_64"]
2: ["x86_64"]
3: ["aarch64"]
0: ["x86_64", "aarch64", "riscv64"]
1: ["aarch64", "x86_64", "riscv64"]
2: ["x86_64", "riscv64", "aarch64"]
3: ["aarch64", "riscv64", "x86_64"]
4: ["riscv64", "x86_64", "aarch64"]
5: ["riscv64", "aarch64", "x86_64"]
6: ["x86_64", "aarch64"]
7: ["aarch64", "x86_64"]
8: ["x86_64", "riscv64"]
9: ["aarch64", "riscv64"]
10: ["riscv64", "aarch64"]
11: ["riscv64", "x86_64"]
12: ["x86_64"]
13: ["aarch64"]
14: ["riscv64"]
}
export type SDKImageInputSpec = {
[A in keyof ArchOptions]: {

View File

@@ -90,5 +90,6 @@ export function buildManifest<
)
: manifest.hardwareRequirements?.arch,
},
hardwareAcceleration: manifest.hardwareAcceleration ?? false,
}
}