mirror of
https://github.com/Start9Labs/start-os.git
synced 2026-03-30 04:01:58 +00:00
feat: preserve volumes on failed install + migrate ext4 to btrfs
- COW snapshot (cp --reflink=always) of package volumes before install/update; restore on failure, remove on success - Automatic ext4→btrfs conversion via btrfs-convert during disk attach with e2fsck pre-check and post-conversion defrag - Probe package-data filesystem during setup.disk.list (on both disk and partition level) so the UI can warn about ext4 conversion - Setup wizard preserve-overwrite dialog shows ext4 warning with backup acknowledgment checkbox before allowing preserve
This commit is contained in:
@@ -25,20 +25,28 @@ pub enum RepairStrategy {
|
||||
Preen,
|
||||
Aggressive,
|
||||
}
|
||||
/// Detects the filesystem type of a block device using `grub-probe`.
|
||||
/// Returns e.g. `"ext2"` (for ext4), `"btrfs"`, etc.
|
||||
pub async fn detect_filesystem(
|
||||
logicalname: impl AsRef<Path> + std::fmt::Debug,
|
||||
) -> Result<String, Error> {
|
||||
Ok(String::from_utf8(
|
||||
Command::new("grub-probe")
|
||||
.arg("-d")
|
||||
.arg(logicalname.as_ref())
|
||||
.invoke(crate::ErrorKind::DiskManagement)
|
||||
.await?,
|
||||
)?
|
||||
.trim()
|
||||
.to_owned())
|
||||
}
|
||||
|
||||
impl RepairStrategy {
|
||||
pub async fn fsck(
|
||||
&self,
|
||||
logicalname: impl AsRef<Path> + std::fmt::Debug,
|
||||
) -> Result<RequiresReboot, Error> {
|
||||
match &*String::from_utf8(
|
||||
Command::new("grub-probe")
|
||||
.arg("-d")
|
||||
.arg(logicalname.as_ref())
|
||||
.invoke(crate::ErrorKind::DiskManagement)
|
||||
.await?,
|
||||
)?
|
||||
.trim()
|
||||
{
|
||||
match &*detect_filesystem(&logicalname).await? {
|
||||
"ext2" => self.e2fsck(logicalname).await,
|
||||
"btrfs" => self.btrfs_check(logicalname).await,
|
||||
fs => {
|
||||
|
||||
@@ -7,7 +7,7 @@ use rust_i18n::t;
|
||||
use tokio::process::Command;
|
||||
use tracing::instrument;
|
||||
|
||||
use super::fsck::{RepairStrategy, RequiresReboot};
|
||||
use super::fsck::{RepairStrategy, RequiresReboot, detect_filesystem};
|
||||
use super::util::pvscan;
|
||||
use crate::disk::mount::filesystem::block_dev::BlockDev;
|
||||
use crate::disk::mount::filesystem::{FileSystem, ReadWrite};
|
||||
@@ -301,6 +301,37 @@ pub async fn mount_fs<P: AsRef<Path>>(
|
||||
.with_ctx(|_| (crate::ErrorKind::Filesystem, PASSWORD_PATH))?;
|
||||
blockdev_path = Path::new("/dev/mapper").join(&full_name);
|
||||
}
|
||||
|
||||
// Convert ext4 → btrfs on the package-data partition if needed
|
||||
let fs_type = detect_filesystem(&blockdev_path).await?;
|
||||
if fs_type == "ext2" {
|
||||
tracing::info!("Running e2fsck before converting {name} from ext4 to btrfs");
|
||||
Command::new("e2fsck")
|
||||
.arg("-fy")
|
||||
.arg(&blockdev_path)
|
||||
.invoke(ErrorKind::DiskManagement)
|
||||
.await?;
|
||||
tracing::info!("Converting {name} from ext4 to btrfs");
|
||||
Command::new("btrfs-convert")
|
||||
.arg("--no-progress")
|
||||
.arg(&blockdev_path)
|
||||
.invoke(ErrorKind::DiskManagement)
|
||||
.await?;
|
||||
// Defragment after conversion for optimal performance
|
||||
let tmp_mount = datadir.as_ref().join(format!("{name}.convert-tmp"));
|
||||
tokio::fs::create_dir_all(&tmp_mount).await?;
|
||||
BlockDev::new(&blockdev_path)
|
||||
.mount(&tmp_mount, ReadWrite)
|
||||
.await?;
|
||||
Command::new("btrfs")
|
||||
.args(["filesystem", "defragment", "-r"])
|
||||
.arg(&tmp_mount)
|
||||
.invoke(ErrorKind::DiskManagement)
|
||||
.await?;
|
||||
unmount(&tmp_mount, false).await?;
|
||||
tokio::fs::remove_dir(&tmp_mount).await?;
|
||||
}
|
||||
|
||||
let reboot = repair.fsck(&blockdev_path).await?;
|
||||
|
||||
if !guid.ends_with("_UNENC") {
|
||||
@@ -342,3 +373,99 @@ pub async fn mount_all_fs<P: AsRef<Path>>(
|
||||
reboot |= mount_fs(guid, &datadir, "package-data", repair, password).await?;
|
||||
Ok(reboot)
|
||||
}
|
||||
|
||||
/// Temporarily activates a VG and opens LUKS to probe the `package-data`
|
||||
/// filesystem type. Returns `None` if probing fails (e.g. LV doesn't exist).
|
||||
#[instrument(skip_all)]
|
||||
pub async fn probe_package_data_fs(guid: &str) -> Result<Option<String>, Error> {
|
||||
// Import and activate the VG
|
||||
match Command::new("vgimport")
|
||||
.arg(guid)
|
||||
.invoke(ErrorKind::DiskManagement)
|
||||
.await
|
||||
{
|
||||
Ok(_) => {}
|
||||
Err(e)
|
||||
if format!("{}", e.source)
|
||||
.lines()
|
||||
.any(|l| l.trim() == format!("Volume group \"{}\" is not exported", guid)) =>
|
||||
{
|
||||
// Already imported, that's fine
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Could not import VG {guid} for filesystem probe: {e}");
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
if let Err(e) = Command::new("vgchange")
|
||||
.arg("-ay")
|
||||
.arg(guid)
|
||||
.invoke(ErrorKind::DiskManagement)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("Could not activate VG {guid} for filesystem probe: {e}");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut opened_luks = false;
|
||||
let result = async {
|
||||
let lv_path = Path::new("/dev").join(guid).join("package-data");
|
||||
if tokio::fs::metadata(&lv_path).await.is_err() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let blockdev_path = if !guid.ends_with("_UNENC") {
|
||||
let full_name = format!("{guid}_package-data");
|
||||
let password = DEFAULT_PASSWORD;
|
||||
if let Some(parent) = Path::new(PASSWORD_PATH).parent() {
|
||||
tokio::fs::create_dir_all(parent).await?;
|
||||
}
|
||||
tokio::fs::write(PASSWORD_PATH, password)
|
||||
.await
|
||||
.with_ctx(|_| (ErrorKind::Filesystem, PASSWORD_PATH))?;
|
||||
Command::new("cryptsetup")
|
||||
.arg("-q")
|
||||
.arg("luksOpen")
|
||||
.arg("--allow-discards")
|
||||
.arg(format!("--key-file={PASSWORD_PATH}"))
|
||||
.arg(format!("--keyfile-size={}", password.len()))
|
||||
.arg(&lv_path)
|
||||
.arg(&full_name)
|
||||
.invoke(ErrorKind::DiskManagement)
|
||||
.await?;
|
||||
let _ = tokio::fs::remove_file(PASSWORD_PATH).await;
|
||||
opened_luks = true;
|
||||
PathBuf::from(format!("/dev/mapper/{full_name}"))
|
||||
} else {
|
||||
lv_path.clone()
|
||||
};
|
||||
|
||||
detect_filesystem(&blockdev_path).await.map(Some)
|
||||
}
|
||||
.await;
|
||||
|
||||
// Always clean up: close LUKS, deactivate VG, export VG
|
||||
if opened_luks {
|
||||
let full_name = format!("{guid}_package-data");
|
||||
Command::new("cryptsetup")
|
||||
.arg("-q")
|
||||
.arg("luksClose")
|
||||
.arg(&full_name)
|
||||
.invoke(ErrorKind::DiskManagement)
|
||||
.await
|
||||
.log_err();
|
||||
}
|
||||
Command::new("vgchange")
|
||||
.arg("-an")
|
||||
.arg(guid)
|
||||
.invoke(ErrorKind::DiskManagement)
|
||||
.await
|
||||
.log_err();
|
||||
Command::new("vgexport")
|
||||
.arg(guid)
|
||||
.invoke(ErrorKind::DiskManagement)
|
||||
.await
|
||||
.log_err();
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ pub struct DiskInfo {
|
||||
pub partitions: Vec<PartitionInfo>,
|
||||
pub capacity: u64,
|
||||
pub guid: Option<InternedString>,
|
||||
pub filesystem: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize, ts_rs::TS)]
|
||||
@@ -55,6 +56,7 @@ pub struct PartitionInfo {
|
||||
pub used: Option<u64>,
|
||||
pub start_os: BTreeMap<String, StartOsRecoveryInfo>,
|
||||
pub guid: Option<InternedString>,
|
||||
pub filesystem: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize, ts_rs::TS)]
|
||||
@@ -374,6 +376,15 @@ pub async fn list(os: &OsPartitionInfo) -> Result<Vec<DiskInfo>, Error> {
|
||||
disk_info.capacity = part_info.capacity;
|
||||
if let Some(g) = disk_guids.get(&disk_info.logicalname) {
|
||||
disk_info.guid = g.clone();
|
||||
if let Some(guid) = g {
|
||||
disk_info.filesystem =
|
||||
crate::disk::main::probe_package_data_fs(guid)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
tracing::warn!("Failed to probe filesystem for {guid}: {e}");
|
||||
None
|
||||
});
|
||||
}
|
||||
} else {
|
||||
disk_info.partitions = vec![part_info];
|
||||
}
|
||||
@@ -384,11 +395,31 @@ pub async fn list(os: &OsPartitionInfo) -> Result<Vec<DiskInfo>, Error> {
|
||||
disk_info.partitions = Vec::with_capacity(index.parts.len());
|
||||
if let Some(g) = disk_guids.get(&disk_info.logicalname) {
|
||||
disk_info.guid = g.clone();
|
||||
if let Some(guid) = g {
|
||||
disk_info.filesystem =
|
||||
crate::disk::main::probe_package_data_fs(guid)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
tracing::warn!("Failed to probe filesystem for {guid}: {e}");
|
||||
None
|
||||
});
|
||||
}
|
||||
} else {
|
||||
for part in index.parts {
|
||||
let mut part_info = part_info(part).await;
|
||||
if let Some(g) = disk_guids.get(&part_info.logicalname) {
|
||||
part_info.guid = g.clone();
|
||||
if let Some(guid) = g {
|
||||
part_info.filesystem =
|
||||
crate::disk::main::probe_package_data_fs(guid)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
tracing::warn!(
|
||||
"Failed to probe filesystem for {guid}: {e}"
|
||||
);
|
||||
None
|
||||
});
|
||||
}
|
||||
}
|
||||
disk_info.partitions.push(part_info);
|
||||
}
|
||||
@@ -461,6 +492,7 @@ async fn disk_info(disk: PathBuf) -> DiskInfo {
|
||||
partitions: Vec::new(),
|
||||
capacity,
|
||||
guid: None,
|
||||
filesystem: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -544,6 +576,7 @@ async fn part_info(part: PathBuf) -> PartitionInfo {
|
||||
used,
|
||||
start_os,
|
||||
guid: None,
|
||||
filesystem: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -422,11 +422,15 @@ impl Service {
|
||||
tracing::error!("Error installing service: {e}");
|
||||
tracing::debug!("{e:?}")
|
||||
}) {
|
||||
crate::volume::remove_install_backup(id).await.log_err();
|
||||
return Ok(Some(service));
|
||||
}
|
||||
}
|
||||
}
|
||||
cleanup(ctx, id, false).await.log_err();
|
||||
crate::volume::restore_volumes_from_install_backup(id)
|
||||
.await
|
||||
.log_err();
|
||||
ctx.db
|
||||
.mutate(|v| v.as_public_mut().as_package_data_mut().remove(id))
|
||||
.await
|
||||
@@ -461,37 +465,60 @@ impl Service {
|
||||
tracing::error!("Error installing service: {e}");
|
||||
tracing::debug!("{e:?}")
|
||||
}) {
|
||||
crate::volume::remove_install_backup(id).await.log_err();
|
||||
return Ok(Some(service));
|
||||
}
|
||||
}
|
||||
}
|
||||
let s9pk = S9pk::open(s9pk_path, Some(id)).await?;
|
||||
ctx.db
|
||||
.mutate({
|
||||
|db| {
|
||||
db.as_public_mut()
|
||||
.as_package_data_mut()
|
||||
.as_idx_mut(id)
|
||||
.or_not_found(id)?
|
||||
.as_state_info_mut()
|
||||
.map_mutate(|s| {
|
||||
if let PackageState::Updating(UpdatingState {
|
||||
manifest, ..
|
||||
}) = s
|
||||
{
|
||||
Ok(PackageState::Installed(InstalledState { manifest }))
|
||||
} else {
|
||||
Err(Error::new(
|
||||
eyre!("{}", t!("service.mod.race-condition-detected")),
|
||||
ErrorKind::Database,
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
.await
|
||||
.result?;
|
||||
handle_installed(s9pk).await
|
||||
match async {
|
||||
let s9pk = S9pk::open(s9pk_path, Some(id)).await?;
|
||||
ctx.db
|
||||
.mutate({
|
||||
|db| {
|
||||
db.as_public_mut()
|
||||
.as_package_data_mut()
|
||||
.as_idx_mut(id)
|
||||
.or_not_found(id)?
|
||||
.as_state_info_mut()
|
||||
.map_mutate(|s| {
|
||||
if let PackageState::Updating(UpdatingState {
|
||||
manifest,
|
||||
..
|
||||
}) = s
|
||||
{
|
||||
Ok(PackageState::Installed(InstalledState { manifest }))
|
||||
} else {
|
||||
Err(Error::new(
|
||||
eyre!(
|
||||
"{}",
|
||||
t!("service.mod.race-condition-detected")
|
||||
),
|
||||
ErrorKind::Database,
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
.await
|
||||
.result?;
|
||||
handle_installed(s9pk).await
|
||||
}
|
||||
.await
|
||||
{
|
||||
Ok(service) => {
|
||||
crate::volume::remove_install_backup(id).await.log_err();
|
||||
Ok(service)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
"Update rollback failed for {id}, restoring volume snapshot: {e}"
|
||||
);
|
||||
crate::volume::restore_volumes_from_install_backup(id)
|
||||
.await
|
||||
.log_err();
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
PackageStateMatchModelRef::Removing(_) | PackageStateMatchModelRef::Restoring(_) => {
|
||||
if let Ok(s9pk) = S9pk::open(s9pk_path, Some(id)).await.map_err(|e| {
|
||||
|
||||
@@ -307,6 +307,8 @@ impl ServiceMap {
|
||||
finalization_progress.start();
|
||||
let s9pk = S9pk::open(&installed_path, Some(&id)).await?;
|
||||
let data_version = get_data_version(&id).await?;
|
||||
// Snapshot existing volumes before install/update modifies them
|
||||
crate::volume::snapshot_volumes_for_install(&id).await?;
|
||||
let prev = if let Some(service) = service.take() {
|
||||
ensure_code!(
|
||||
recovery_source.is_none(),
|
||||
@@ -382,6 +384,8 @@ impl ServiceMap {
|
||||
cleanup.await?;
|
||||
}
|
||||
|
||||
crate::volume::remove_install_backup(&id).await.log_err();
|
||||
|
||||
drop(service);
|
||||
|
||||
sync_progress_task.await.map_err(|_| {
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use tokio::process::Command;
|
||||
|
||||
use crate::PackageId;
|
||||
pub use crate::VolumeId;
|
||||
use crate::prelude::*;
|
||||
use crate::util::Invoke;
|
||||
use crate::util::VersionString;
|
||||
use crate::DATA_DIR;
|
||||
|
||||
pub const PKG_VOLUME_DIR: &str = "package-data/volumes";
|
||||
pub const BACKUP_DIR: &str = "/media/startos/backups";
|
||||
|
||||
const INSTALL_BACKUP_SUFFIX: &str = ".install-backup";
|
||||
|
||||
pub fn data_dir<P: AsRef<Path>>(datadir: P, pkg_id: &PackageId, volume_id: &VolumeId) -> PathBuf {
|
||||
datadir
|
||||
.as_ref()
|
||||
@@ -33,3 +39,70 @@ pub fn asset_dir<P: AsRef<Path>>(
|
||||
pub fn backup_dir(pkg_id: &PackageId) -> PathBuf {
|
||||
Path::new(BACKUP_DIR).join(pkg_id).join("data")
|
||||
}
|
||||
|
||||
fn pkg_volume_dir(pkg_id: &PackageId) -> PathBuf {
|
||||
Path::new(DATA_DIR).join(PKG_VOLUME_DIR).join(pkg_id)
|
||||
}
|
||||
|
||||
fn install_backup_path(pkg_id: &PackageId) -> PathBuf {
|
||||
Path::new(DATA_DIR)
|
||||
.join(PKG_VOLUME_DIR)
|
||||
.join(format!("{pkg_id}{INSTALL_BACKUP_SUFFIX}"))
|
||||
}
|
||||
|
||||
/// Creates a COW snapshot of the package volume directory before install.
|
||||
/// Uses `cp --reflink=always` so it's instant on btrfs and fails gracefully
|
||||
/// on ext4 (no backup, current behavior preserved).
|
||||
/// Returns `true` if a backup was created, `false` if no data existed or
|
||||
/// the filesystem doesn't support reflinks.
|
||||
pub async fn snapshot_volumes_for_install(pkg_id: &PackageId) -> Result<bool, Error> {
|
||||
let src = pkg_volume_dir(pkg_id);
|
||||
if tokio::fs::metadata(&src).await.is_err() {
|
||||
return Ok(false);
|
||||
}
|
||||
let dst = install_backup_path(pkg_id);
|
||||
// Remove any stale backup from a previous failed attempt
|
||||
crate::util::io::delete_dir(&dst).await?;
|
||||
match Command::new("cp")
|
||||
.arg("-a")
|
||||
.arg("--reflink=always")
|
||||
.arg(&src)
|
||||
.arg(&dst)
|
||||
.invoke(ErrorKind::Filesystem)
|
||||
.await
|
||||
{
|
||||
Ok(_) => {
|
||||
tracing::info!("Created install backup for {pkg_id} at {dst:?}");
|
||||
Ok(true)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"Could not create install backup for {pkg_id} \
|
||||
(filesystem may not support reflinks): {e}"
|
||||
);
|
||||
// Clean up partial copy if any
|
||||
crate::util::io::delete_dir(&dst).await?;
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Restores the package volume directory from a COW snapshot after a failed
|
||||
/// install. The current (possibly corrupted) volume dir is deleted first.
|
||||
/// No-op if no backup exists.
|
||||
pub async fn restore_volumes_from_install_backup(pkg_id: &PackageId) -> Result<(), Error> {
|
||||
let backup = install_backup_path(pkg_id);
|
||||
if tokio::fs::metadata(&backup).await.is_err() {
|
||||
return Ok(());
|
||||
}
|
||||
let dst = pkg_volume_dir(pkg_id);
|
||||
crate::util::io::delete_dir(&dst).await?;
|
||||
crate::util::io::rename(&backup, &dst).await?;
|
||||
tracing::info!("Restored volumes from install backup for {pkg_id}");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Removes the install backup after a successful install.
|
||||
pub async fn remove_install_backup(pkg_id: &PackageId) -> Result<(), Error> {
|
||||
crate::util::io::delete_dir(&install_backup_path(pkg_id)).await
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user