mirror of
https://github.com/Start9Labs/start-os.git
synced 2026-03-30 12:11:56 +00:00
hardware acceleration and support for NVIDIA cards on nonfree images (#3089)
* add nvidia packages
* add nvidia deps to nonfree
* gpu_acceleration flag & nvidia hacking
* fix gpu_config & /tmp/lxc.log
* implement hardware acceleration more dynamically
* refactor OpenUI
* use mknod
* registry updates for multi-hardware-requirements
* pluralize
* handle new registry types
* remove log
* migrations and driver fixes
* wip
* misc patches
* handle nvidia-container differently
* chore: comments (#3093)
* chore: comments
* revert some sizing
---------
Co-authored-by: Matt Hill <mattnine@protonmail.com>
* Revert "handle nvidia-container differently"
This reverts commit d708ae53df.
* fix debian containers
* cleanup
* feat: add empty array placeholder in forms (#3095)
* fixes from testing, client side device filtering for better fingerprinting resistance
* fix mac builds
---------
Co-authored-by: Sam Sartor <me@samsartor.com>
Co-authored-by: Matt Hill <mattnine@protonmail.com>
Co-authored-by: Alex Inkin <alexander@inkin.ru>
This commit is contained in:
@@ -5,11 +5,13 @@ use std::sync::{Arc, Weak};
|
||||
use std::time::Duration;
|
||||
|
||||
use clap::builder::ValueParserFactory;
|
||||
use futures::StreamExt;
|
||||
use futures::future::BoxFuture;
|
||||
use futures::{FutureExt, StreamExt};
|
||||
use imbl_value::InternedString;
|
||||
use rpc_toolkit::yajrc::RpcError;
|
||||
use rpc_toolkit::{RpcRequest, RpcResponse};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::fs::ReadDir;
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command;
|
||||
use tokio::sync::Mutex;
|
||||
@@ -27,7 +29,7 @@ use crate::disk::mount::util::unmount;
|
||||
use crate::prelude::*;
|
||||
use crate::rpc_continuations::{Guid, RpcContinuation};
|
||||
use crate::service::ServiceStats;
|
||||
use crate::util::io::open_file;
|
||||
use crate::util::io::{open_file, write_file_owned_atomic};
|
||||
use crate::util::rpc_client::UnixRpcClient;
|
||||
use crate::util::{FromStrParser, Invoke, new_guid};
|
||||
use crate::{InvalidId, PackageId};
|
||||
@@ -37,6 +39,7 @@ const RPC_DIR: &str = "media/startos/rpc"; // must not be absolute path
|
||||
pub const CONTAINER_RPC_SERVER_SOCKET: &str = "service.sock"; // must not be absolute path
|
||||
pub const HOST_RPC_SERVER_SOCKET: &str = "host.sock"; // must not be absolute path
|
||||
const CONTAINER_DHCP_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
const HARDWARE_ACCELERATION_PATHS: &[&str] = &["/dev/dri", "/dev/nvidia*", "/dev/kfd"];
|
||||
|
||||
#[derive(
|
||||
Clone, Debug, Serialize, Deserialize, Default, PartialEq, Eq, PartialOrd, Ord, Hash, TS,
|
||||
@@ -174,12 +177,8 @@ impl LxcContainer {
|
||||
let machine_id = hex::encode(rand::random::<[u8; 16]>());
|
||||
let container_dir = Path::new(LXC_CONTAINER_DIR).join(&*guid);
|
||||
tokio::fs::create_dir_all(&container_dir).await?;
|
||||
tokio::fs::write(
|
||||
container_dir.join("config"),
|
||||
format!(include_str!("./config.template"), guid = &*guid),
|
||||
)
|
||||
.await?;
|
||||
// TODO: append config
|
||||
let config_str = format!(include_str!("./config.template"), guid = &*guid);
|
||||
tokio::fs::write(container_dir.join("config"), config_str).await?;
|
||||
let rootfs_dir = container_dir.join("rootfs");
|
||||
let rootfs = OverlayGuard::mount(
|
||||
TmpMountGuard::mount(
|
||||
@@ -197,8 +196,25 @@ impl LxcContainer {
|
||||
&rootfs_dir,
|
||||
)
|
||||
.await?;
|
||||
tokio::fs::write(rootfs_dir.join("etc/machine-id"), format!("{machine_id}\n")).await?;
|
||||
tokio::fs::write(rootfs_dir.join("etc/hostname"), format!("{guid}\n")).await?;
|
||||
Command::new("chown")
|
||||
.arg("100000:100000")
|
||||
.arg(&rootfs_dir)
|
||||
.invoke(ErrorKind::Filesystem)
|
||||
.await?;
|
||||
write_file_owned_atomic(
|
||||
rootfs_dir.join("etc/machine-id"),
|
||||
format!("{machine_id}\n"),
|
||||
100000,
|
||||
100000,
|
||||
)
|
||||
.await?;
|
||||
write_file_owned_atomic(
|
||||
rootfs_dir.join("etc/hostname"),
|
||||
format!("{guid}\n"),
|
||||
100000,
|
||||
100000,
|
||||
)
|
||||
.await?;
|
||||
Command::new("sed")
|
||||
.arg("-i")
|
||||
.arg(format!("s/LXC_NAME/{guid}/g"))
|
||||
@@ -248,9 +264,13 @@ impl LxcContainer {
|
||||
.arg("-d")
|
||||
.arg("--name")
|
||||
.arg(&*guid)
|
||||
.arg("-o")
|
||||
.arg(format!("/run/startos/LXC_{guid}.log"))
|
||||
.arg("-l")
|
||||
.arg("DEBUG")
|
||||
.invoke(ErrorKind::Lxc)
|
||||
.await?;
|
||||
Ok(Self {
|
||||
let res = Self {
|
||||
manager: Arc::downgrade(manager),
|
||||
rootfs,
|
||||
guid: Arc::new(ContainerId::try_from(&*guid)?),
|
||||
@@ -258,7 +278,84 @@ impl LxcContainer {
|
||||
config,
|
||||
exited: false,
|
||||
log_mount,
|
||||
})
|
||||
};
|
||||
if res.config.hardware_acceleration {
|
||||
res.handle_devices(
|
||||
tokio::fs::read_dir("/dev")
|
||||
.await
|
||||
.with_ctx(|_| (ErrorKind::Filesystem, "readdir /dev"))?,
|
||||
HARDWARE_ACCELERATION_PATHS,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
async fn handle_devices(&self, _: ReadDir, _: &[&str]) -> Result<(), Error> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn handle_devices<'a>(
|
||||
&'a self,
|
||||
mut dir: ReadDir,
|
||||
matches: &'a [&'a str],
|
||||
) -> BoxFuture<'a, Result<(), Error>> {
|
||||
use std::os::linux::fs::MetadataExt;
|
||||
use std::os::unix::fs::FileTypeExt;
|
||||
async move {
|
||||
while let Some(ent) = dir.next_entry().await? {
|
||||
let path = ent.path();
|
||||
if let Some(matches) = if matches.is_empty() {
|
||||
Some(Vec::new())
|
||||
} else {
|
||||
let mut new_matches = Vec::new();
|
||||
for mut m in matches.iter().copied() {
|
||||
let could_match = if let Some(prefix) = m.strip_suffix("*") {
|
||||
m = prefix;
|
||||
path.to_string_lossy().starts_with(m)
|
||||
} else {
|
||||
path.starts_with(m)
|
||||
} || Path::new(m).starts_with(&path);
|
||||
if could_match {
|
||||
new_matches.push(m);
|
||||
}
|
||||
}
|
||||
if new_matches.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(new_matches)
|
||||
}
|
||||
} {
|
||||
let meta = ent.metadata().await?;
|
||||
let ty = meta.file_type();
|
||||
if ty.is_dir() {
|
||||
self.handle_devices(
|
||||
tokio::fs::read_dir(&path).await.with_ctx(|_| {
|
||||
(ErrorKind::Filesystem, format!("readdir {path:?}"))
|
||||
})?,
|
||||
&matches,
|
||||
)
|
||||
.await?;
|
||||
} else {
|
||||
let ty = if ty.is_char_device() {
|
||||
'c'
|
||||
} else if ty.is_block_device() {
|
||||
'b'
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
let rdev = meta.st_rdev();
|
||||
let major = ((rdev >> 8) & 0xfff) as u32;
|
||||
let minor = ((rdev & 0xff) | ((rdev >> 12) & 0xfff00)) as u32;
|
||||
self.mknod(&path, ty, major, minor).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
.boxed()
|
||||
}
|
||||
|
||||
pub fn rootfs_dir(&self) -> &Path {
|
||||
@@ -329,7 +426,7 @@ impl LxcContainer {
|
||||
.await?;
|
||||
self.rpc_bind.take().unmount().await?;
|
||||
if let Some(log_mount) = self.log_mount.take() {
|
||||
log_mount.unmount(true).await?;
|
||||
log_mount.unmount(false).await?;
|
||||
}
|
||||
self.rootfs.take().unmount(true).await?;
|
||||
let rootfs_path = self.rootfs_dir();
|
||||
@@ -351,7 +448,10 @@ impl LxcContainer {
|
||||
.invoke(ErrorKind::Lxc)
|
||||
.await?;
|
||||
|
||||
self.exited = true;
|
||||
#[allow(unused_assignments)]
|
||||
{
|
||||
self.exited = true;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -361,6 +461,17 @@ impl LxcContainer {
|
||||
let sock_path = self.rpc_dir().join(CONTAINER_RPC_SERVER_SOCKET);
|
||||
while tokio::fs::metadata(&sock_path).await.is_err() {
|
||||
if timeout.map_or(false, |t| started.elapsed() > t) {
|
||||
tracing::error!(
|
||||
"{:?}",
|
||||
Command::new("lxc-attach")
|
||||
.arg(&**self.guid)
|
||||
.arg("--")
|
||||
.arg("systemctl")
|
||||
.arg("status")
|
||||
.arg("container-runtime")
|
||||
.invoke(ErrorKind::Unknown)
|
||||
.await
|
||||
);
|
||||
return Err(Error::new(
|
||||
eyre!("timed out waiting for socket"),
|
||||
ErrorKind::Timeout,
|
||||
@@ -371,6 +482,88 @@ impl LxcContainer {
|
||||
tracing::info!("Connected to socket in {:?}", started.elapsed());
|
||||
Ok(UnixRpcClient::new(sock_path))
|
||||
}
|
||||
|
||||
pub async fn mknod(&self, path: &Path, ty: char, major: u32, minor: u32) -> Result<(), Error> {
|
||||
if let Ok(dev_rel) = path.strip_prefix("/dev") {
|
||||
let parent = dev_rel.parent();
|
||||
let media_dev = self.rootfs_dir().join("media/startos/dev");
|
||||
let target_path = media_dev.join(dev_rel);
|
||||
if tokio::fs::metadata(&target_path).await.is_ok() {
|
||||
return Ok(());
|
||||
}
|
||||
if let Some(parent) = parent {
|
||||
let p = media_dev.join(parent);
|
||||
tokio::fs::create_dir_all(&p)
|
||||
.await
|
||||
.with_ctx(|_| (ErrorKind::Filesystem, format!("mkdir -p {p:?}")))?;
|
||||
for p in parent.ancestors() {
|
||||
Command::new("chown")
|
||||
.arg("100000:100000")
|
||||
.arg(media_dev.join(p))
|
||||
.invoke(ErrorKind::Filesystem)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
Command::new("mknod")
|
||||
.arg(&target_path)
|
||||
.arg(&*InternedString::from_display(&ty))
|
||||
.arg(&*InternedString::from_display(&major))
|
||||
.arg(&*InternedString::from_display(&minor))
|
||||
.invoke(ErrorKind::Filesystem)
|
||||
.await?;
|
||||
Command::new("chown")
|
||||
.arg("100000:100000")
|
||||
.arg(&target_path)
|
||||
.invoke(ErrorKind::Filesystem)
|
||||
.await?;
|
||||
if let Some(parent) = parent {
|
||||
Command::new("lxc-attach")
|
||||
.arg(&**self.guid)
|
||||
.arg("--")
|
||||
.arg("mkdir")
|
||||
.arg("-p")
|
||||
.arg(Path::new("/dev").join(parent))
|
||||
.invoke(ErrorKind::Lxc)
|
||||
.await?;
|
||||
}
|
||||
Command::new("lxc-attach")
|
||||
.arg(&**self.guid)
|
||||
.arg("--")
|
||||
.arg("touch")
|
||||
.arg(&path)
|
||||
.invoke(ErrorKind::Lxc)
|
||||
.await?;
|
||||
Command::new("lxc-attach")
|
||||
.arg(&**self.guid)
|
||||
.arg("--")
|
||||
.arg("mount")
|
||||
.arg("--bind")
|
||||
.arg(Path::new("/media/startos/dev").join(dev_rel))
|
||||
.arg(&path)
|
||||
.invoke(ErrorKind::Lxc)
|
||||
.await?;
|
||||
} else {
|
||||
let target_path = self
|
||||
.rootfs_dir()
|
||||
.join(path.strip_prefix("/").unwrap_or(&path));
|
||||
if tokio::fs::metadata(&target_path).await.is_ok() {
|
||||
return Ok(());
|
||||
}
|
||||
Command::new("mknod")
|
||||
.arg(&target_path)
|
||||
.arg(&*InternedString::from_display(&ty))
|
||||
.arg(&*InternedString::from_display(&major))
|
||||
.arg(&*InternedString::from_display(&minor))
|
||||
.invoke(ErrorKind::Filesystem)
|
||||
.await?;
|
||||
Command::new("chown")
|
||||
.arg("100000:100000")
|
||||
.arg(&target_path)
|
||||
.invoke(ErrorKind::Filesystem)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
impl Drop for LxcContainer {
|
||||
fn drop(&mut self) {
|
||||
@@ -414,7 +607,10 @@ impl Drop for LxcContainer {
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct LxcConfig {}
|
||||
pub struct LxcConfig {
|
||||
pub hardware_acceleration: bool,
|
||||
}
|
||||
|
||||
pub async fn connect(ctx: &RpcContext, container: &LxcContainer) -> Result<Guid, Error> {
|
||||
use axum::extract::ws::Message;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user