feat: preserve volumes on failed install + migrate ext4 to btrfs

- COW snapshot (cp --reflink=always) of package volumes before
  install/update; restore on failure, remove on success
- Automatic ext4→btrfs conversion via btrfs-convert during disk attach
  with e2fsck pre-check and post-conversion defrag
- Probe package-data filesystem during setup.disk.list (on both disk
  and partition level) so the UI can warn about ext4 conversion
- Setup wizard preserve-overwrite dialog shows ext4 warning with
  backup acknowledgment checkbox before allowing preserve
This commit is contained in:
Aiden McClelland
2026-03-17 15:10:03 -06:00
parent c1a328e5ca
commit 900d86ab83
15 changed files with 386 additions and 42 deletions

View File

@@ -25,20 +25,28 @@ pub enum RepairStrategy {
Preen,
Aggressive,
}
/// Detects the filesystem type of a block device using `grub-probe`.
/// Returns e.g. `"ext2"` (for ext4), `"btrfs"`, etc.
pub async fn detect_filesystem(
logicalname: impl AsRef<Path> + std::fmt::Debug,
) -> Result<String, Error> {
Ok(String::from_utf8(
Command::new("grub-probe")
.arg("-d")
.arg(logicalname.as_ref())
.invoke(crate::ErrorKind::DiskManagement)
.await?,
)?
.trim()
.to_owned())
}
impl RepairStrategy {
pub async fn fsck(
&self,
logicalname: impl AsRef<Path> + std::fmt::Debug,
) -> Result<RequiresReboot, Error> {
match &*String::from_utf8(
Command::new("grub-probe")
.arg("-d")
.arg(logicalname.as_ref())
.invoke(crate::ErrorKind::DiskManagement)
.await?,
)?
.trim()
{
match &*detect_filesystem(&logicalname).await? {
"ext2" => self.e2fsck(logicalname).await,
"btrfs" => self.btrfs_check(logicalname).await,
fs => {

View File

@@ -7,7 +7,7 @@ use rust_i18n::t;
use tokio::process::Command;
use tracing::instrument;
use super::fsck::{RepairStrategy, RequiresReboot};
use super::fsck::{RepairStrategy, RequiresReboot, detect_filesystem};
use super::util::pvscan;
use crate::disk::mount::filesystem::block_dev::BlockDev;
use crate::disk::mount::filesystem::{FileSystem, ReadWrite};
@@ -301,6 +301,37 @@ pub async fn mount_fs<P: AsRef<Path>>(
.with_ctx(|_| (crate::ErrorKind::Filesystem, PASSWORD_PATH))?;
blockdev_path = Path::new("/dev/mapper").join(&full_name);
}
// Convert ext4 → btrfs on the package-data partition if needed
let fs_type = detect_filesystem(&blockdev_path).await?;
if fs_type == "ext2" {
tracing::info!("Running e2fsck before converting {name} from ext4 to btrfs");
Command::new("e2fsck")
.arg("-fy")
.arg(&blockdev_path)
.invoke(ErrorKind::DiskManagement)
.await?;
tracing::info!("Converting {name} from ext4 to btrfs");
Command::new("btrfs-convert")
.arg("--no-progress")
.arg(&blockdev_path)
.invoke(ErrorKind::DiskManagement)
.await?;
// Defragment after conversion for optimal performance
let tmp_mount = datadir.as_ref().join(format!("{name}.convert-tmp"));
tokio::fs::create_dir_all(&tmp_mount).await?;
BlockDev::new(&blockdev_path)
.mount(&tmp_mount, ReadWrite)
.await?;
Command::new("btrfs")
.args(["filesystem", "defragment", "-r"])
.arg(&tmp_mount)
.invoke(ErrorKind::DiskManagement)
.await?;
unmount(&tmp_mount, false).await?;
tokio::fs::remove_dir(&tmp_mount).await?;
}
let reboot = repair.fsck(&blockdev_path).await?;
if !guid.ends_with("_UNENC") {
@@ -342,3 +373,99 @@ pub async fn mount_all_fs<P: AsRef<Path>>(
reboot |= mount_fs(guid, &datadir, "package-data", repair, password).await?;
Ok(reboot)
}
/// Temporarily activates a VG and opens LUKS to probe the `package-data`
/// filesystem type. Returns `None` if probing fails (e.g. LV doesn't exist).
#[instrument(skip_all)]
pub async fn probe_package_data_fs(guid: &str) -> Result<Option<String>, Error> {
// Import and activate the VG
match Command::new("vgimport")
.arg(guid)
.invoke(ErrorKind::DiskManagement)
.await
{
Ok(_) => {}
Err(e)
if format!("{}", e.source)
.lines()
.any(|l| l.trim() == format!("Volume group \"{}\" is not exported", guid)) =>
{
// Already imported, that's fine
}
Err(e) => {
tracing::warn!("Could not import VG {guid} for filesystem probe: {e}");
return Ok(None);
}
}
if let Err(e) = Command::new("vgchange")
.arg("-ay")
.arg(guid)
.invoke(ErrorKind::DiskManagement)
.await
{
tracing::warn!("Could not activate VG {guid} for filesystem probe: {e}");
return Ok(None);
}
let mut opened_luks = false;
let result = async {
let lv_path = Path::new("/dev").join(guid).join("package-data");
if tokio::fs::metadata(&lv_path).await.is_err() {
return Ok(None);
}
let blockdev_path = if !guid.ends_with("_UNENC") {
let full_name = format!("{guid}_package-data");
let password = DEFAULT_PASSWORD;
if let Some(parent) = Path::new(PASSWORD_PATH).parent() {
tokio::fs::create_dir_all(parent).await?;
}
tokio::fs::write(PASSWORD_PATH, password)
.await
.with_ctx(|_| (ErrorKind::Filesystem, PASSWORD_PATH))?;
Command::new("cryptsetup")
.arg("-q")
.arg("luksOpen")
.arg("--allow-discards")
.arg(format!("--key-file={PASSWORD_PATH}"))
.arg(format!("--keyfile-size={}", password.len()))
.arg(&lv_path)
.arg(&full_name)
.invoke(ErrorKind::DiskManagement)
.await?;
let _ = tokio::fs::remove_file(PASSWORD_PATH).await;
opened_luks = true;
PathBuf::from(format!("/dev/mapper/{full_name}"))
} else {
lv_path.clone()
};
detect_filesystem(&blockdev_path).await.map(Some)
}
.await;
// Always clean up: close LUKS, deactivate VG, export VG
if opened_luks {
let full_name = format!("{guid}_package-data");
Command::new("cryptsetup")
.arg("-q")
.arg("luksClose")
.arg(&full_name)
.invoke(ErrorKind::DiskManagement)
.await
.log_err();
}
Command::new("vgchange")
.arg("-an")
.arg(guid)
.invoke(ErrorKind::DiskManagement)
.await
.log_err();
Command::new("vgexport")
.arg(guid)
.invoke(ErrorKind::DiskManagement)
.await
.log_err();
result
}

View File

@@ -41,6 +41,7 @@ pub struct DiskInfo {
pub partitions: Vec<PartitionInfo>,
pub capacity: u64,
pub guid: Option<InternedString>,
pub filesystem: Option<String>,
}
#[derive(Clone, Debug, Deserialize, Serialize, ts_rs::TS)]
@@ -55,6 +56,7 @@ pub struct PartitionInfo {
pub used: Option<u64>,
pub start_os: BTreeMap<String, StartOsRecoveryInfo>,
pub guid: Option<InternedString>,
pub filesystem: Option<String>,
}
#[derive(Clone, Debug, Default, Deserialize, Serialize, ts_rs::TS)]
@@ -374,6 +376,15 @@ pub async fn list(os: &OsPartitionInfo) -> Result<Vec<DiskInfo>, Error> {
disk_info.capacity = part_info.capacity;
if let Some(g) = disk_guids.get(&disk_info.logicalname) {
disk_info.guid = g.clone();
if let Some(guid) = g {
disk_info.filesystem =
crate::disk::main::probe_package_data_fs(guid)
.await
.unwrap_or_else(|e| {
tracing::warn!("Failed to probe filesystem for {guid}: {e}");
None
});
}
} else {
disk_info.partitions = vec![part_info];
}
@@ -384,11 +395,31 @@ pub async fn list(os: &OsPartitionInfo) -> Result<Vec<DiskInfo>, Error> {
disk_info.partitions = Vec::with_capacity(index.parts.len());
if let Some(g) = disk_guids.get(&disk_info.logicalname) {
disk_info.guid = g.clone();
if let Some(guid) = g {
disk_info.filesystem =
crate::disk::main::probe_package_data_fs(guid)
.await
.unwrap_or_else(|e| {
tracing::warn!("Failed to probe filesystem for {guid}: {e}");
None
});
}
} else {
for part in index.parts {
let mut part_info = part_info(part).await;
if let Some(g) = disk_guids.get(&part_info.logicalname) {
part_info.guid = g.clone();
if let Some(guid) = g {
part_info.filesystem =
crate::disk::main::probe_package_data_fs(guid)
.await
.unwrap_or_else(|e| {
tracing::warn!(
"Failed to probe filesystem for {guid}: {e}"
);
None
});
}
}
disk_info.partitions.push(part_info);
}
@@ -461,6 +492,7 @@ async fn disk_info(disk: PathBuf) -> DiskInfo {
partitions: Vec::new(),
capacity,
guid: None,
filesystem: None,
}
}
@@ -544,6 +576,7 @@ async fn part_info(part: PathBuf) -> PartitionInfo {
used,
start_os,
guid: None,
filesystem: None,
}
}

View File

@@ -422,11 +422,15 @@ impl Service {
tracing::error!("Error installing service: {e}");
tracing::debug!("{e:?}")
}) {
crate::volume::remove_install_backup(id).await.log_err();
return Ok(Some(service));
}
}
}
cleanup(ctx, id, false).await.log_err();
crate::volume::restore_volumes_from_install_backup(id)
.await
.log_err();
ctx.db
.mutate(|v| v.as_public_mut().as_package_data_mut().remove(id))
.await
@@ -461,37 +465,60 @@ impl Service {
tracing::error!("Error installing service: {e}");
tracing::debug!("{e:?}")
}) {
crate::volume::remove_install_backup(id).await.log_err();
return Ok(Some(service));
}
}
}
let s9pk = S9pk::open(s9pk_path, Some(id)).await?;
ctx.db
.mutate({
|db| {
db.as_public_mut()
.as_package_data_mut()
.as_idx_mut(id)
.or_not_found(id)?
.as_state_info_mut()
.map_mutate(|s| {
if let PackageState::Updating(UpdatingState {
manifest, ..
}) = s
{
Ok(PackageState::Installed(InstalledState { manifest }))
} else {
Err(Error::new(
eyre!("{}", t!("service.mod.race-condition-detected")),
ErrorKind::Database,
))
}
})
}
})
.await
.result?;
handle_installed(s9pk).await
match async {
let s9pk = S9pk::open(s9pk_path, Some(id)).await?;
ctx.db
.mutate({
|db| {
db.as_public_mut()
.as_package_data_mut()
.as_idx_mut(id)
.or_not_found(id)?
.as_state_info_mut()
.map_mutate(|s| {
if let PackageState::Updating(UpdatingState {
manifest,
..
}) = s
{
Ok(PackageState::Installed(InstalledState { manifest }))
} else {
Err(Error::new(
eyre!(
"{}",
t!("service.mod.race-condition-detected")
),
ErrorKind::Database,
))
}
})
}
})
.await
.result?;
handle_installed(s9pk).await
}
.await
{
Ok(service) => {
crate::volume::remove_install_backup(id).await.log_err();
Ok(service)
}
Err(e) => {
tracing::error!(
"Update rollback failed for {id}, restoring volume snapshot: {e}"
);
crate::volume::restore_volumes_from_install_backup(id)
.await
.log_err();
Err(e)
}
}
}
PackageStateMatchModelRef::Removing(_) | PackageStateMatchModelRef::Restoring(_) => {
if let Ok(s9pk) = S9pk::open(s9pk_path, Some(id)).await.map_err(|e| {

View File

@@ -307,6 +307,8 @@ impl ServiceMap {
finalization_progress.start();
let s9pk = S9pk::open(&installed_path, Some(&id)).await?;
let data_version = get_data_version(&id).await?;
// Snapshot existing volumes before install/update modifies them
crate::volume::snapshot_volumes_for_install(&id).await?;
let prev = if let Some(service) = service.take() {
ensure_code!(
recovery_source.is_none(),
@@ -382,6 +384,8 @@ impl ServiceMap {
cleanup.await?;
}
crate::volume::remove_install_backup(&id).await.log_err();
drop(service);
sync_progress_task.await.map_err(|_| {

View File

@@ -1,13 +1,19 @@
use std::path::{Path, PathBuf};
use tokio::process::Command;
use crate::PackageId;
pub use crate::VolumeId;
use crate::prelude::*;
use crate::util::Invoke;
use crate::util::VersionString;
use crate::DATA_DIR;
pub const PKG_VOLUME_DIR: &str = "package-data/volumes";
pub const BACKUP_DIR: &str = "/media/startos/backups";
const INSTALL_BACKUP_SUFFIX: &str = ".install-backup";
pub fn data_dir<P: AsRef<Path>>(datadir: P, pkg_id: &PackageId, volume_id: &VolumeId) -> PathBuf {
datadir
.as_ref()
@@ -33,3 +39,70 @@ pub fn asset_dir<P: AsRef<Path>>(
pub fn backup_dir(pkg_id: &PackageId) -> PathBuf {
Path::new(BACKUP_DIR).join(pkg_id).join("data")
}
fn pkg_volume_dir(pkg_id: &PackageId) -> PathBuf {
Path::new(DATA_DIR).join(PKG_VOLUME_DIR).join(pkg_id)
}
fn install_backup_path(pkg_id: &PackageId) -> PathBuf {
Path::new(DATA_DIR)
.join(PKG_VOLUME_DIR)
.join(format!("{pkg_id}{INSTALL_BACKUP_SUFFIX}"))
}
/// Creates a COW snapshot of the package volume directory before install.
/// Uses `cp --reflink=always` so it's instant on btrfs and fails gracefully
/// on ext4 (no backup, current behavior preserved).
/// Returns `true` if a backup was created, `false` if no data existed or
/// the filesystem doesn't support reflinks.
pub async fn snapshot_volumes_for_install(pkg_id: &PackageId) -> Result<bool, Error> {
let src = pkg_volume_dir(pkg_id);
if tokio::fs::metadata(&src).await.is_err() {
return Ok(false);
}
let dst = install_backup_path(pkg_id);
// Remove any stale backup from a previous failed attempt
crate::util::io::delete_dir(&dst).await?;
match Command::new("cp")
.arg("-a")
.arg("--reflink=always")
.arg(&src)
.arg(&dst)
.invoke(ErrorKind::Filesystem)
.await
{
Ok(_) => {
tracing::info!("Created install backup for {pkg_id} at {dst:?}");
Ok(true)
}
Err(e) => {
tracing::warn!(
"Could not create install backup for {pkg_id} \
(filesystem may not support reflinks): {e}"
);
// Clean up partial copy if any
crate::util::io::delete_dir(&dst).await?;
Ok(false)
}
}
}
/// Restores the package volume directory from a COW snapshot after a failed
/// install. The current (possibly corrupted) volume dir is deleted first.
/// No-op if no backup exists.
pub async fn restore_volumes_from_install_backup(pkg_id: &PackageId) -> Result<(), Error> {
let backup = install_backup_path(pkg_id);
if tokio::fs::metadata(&backup).await.is_err() {
return Ok(());
}
let dst = pkg_volume_dir(pkg_id);
crate::util::io::delete_dir(&dst).await?;
crate::util::io::rename(&backup, &dst).await?;
tracing::info!("Restored volumes from install backup for {pkg_id}");
Ok(())
}
/// Removes the install backup after a successful install.
pub async fn remove_install_backup(pkg_id: &PackageId) -> Result<(), Error> {
crate::util::io::delete_dir(&install_backup_path(pkg_id)).await
}