diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 127b67ca7..51f4129f8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -158,6 +158,13 @@ jobs: # These tests may mutate the system live so we can't run in parallel sudo bootc-integration-tests system-reinstall localhost/bootc --test-threads=1 + # Unified storage case + sudo podman build -t localhost/bootc-unified-storage -f ci/Containerfile.install-unified-storage + sudo podman run --privileged --pid=host localhost/bootc-unified-storage bootc install to-existing-root --stateroot=unified-storage --acknowledge-destructive --skip-fetch-check + # Verify unified storage was activated; composefs/bootc.json is written relative to + # the target physical root (/target bind-mounted to host /), so the file appears at /composefs/bootc.json + sudo test -f /composefs/bootc.json + # And the fsverity case sudo podman run --privileged --pid=host localhost/bootc-fsverity bootc install to-existing-root --stateroot=other \ --acknowledge-destructive --skip-fetch-check diff --git a/Justfile b/Justfile index 6ba80d0c7..800db4c8f 100644 --- a/Justfile +++ b/Justfile @@ -290,12 +290,27 @@ test-tmt-baseconfig baseconfig *ARGS: --seal-state={{seal_state}} \ {{base_img}} readonly {{ARGS}} +# Run unified-storage baseconfig test (works with ostree variant, no composefs required) +[group('testing')] +test-tmt-baseconfig-unified-storage *ARGS: + just baseconfigs=unified-storage build + just baseconfigs=unified-storage _build-upgrade-image + cargo xtask run-tmt \ + --env=BOOTC_baseconfigs=unified-storage \ + --upgrade-image={{upgrade_img}} \ + --bootloader={{bootloader}} \ + --filesystem={{filesystem}} \ + --boot-type={{boot_type}} \ + --seal-state={{seal_state}} \ + {{base_img}} readonly {{ARGS}} + # Run readonly tests for all standard baseconfigs [group('testing')] test-baseconfigs *ARGS: just test-tmt-baseconfig etc-transient {{ARGS}} just test-tmt-baseconfig root-transient {{ARGS}} just test-tmt-baseconfig var-volatile {{ARGS}} + just test-tmt-baseconfig-unified-storage {{ARGS}} # Run tmt tests on Fedora CoreOS [group('testing')] diff --git a/ci/Containerfile.install-unified-storage b/ci/Containerfile.install-unified-storage new file mode 100644 index 000000000..556151421 --- /dev/null +++ b/ci/Containerfile.install-unified-storage @@ -0,0 +1,8 @@ +# Enable unified storage (composefs+ostree) at install time via image-embedded config +FROM localhost/bootc-install +RUN < /usr/lib/bootc/install/00-storage.toml +bootc container lint +EORUN diff --git a/contrib/packaging/inject-baseconfig b/contrib/packaging/inject-baseconfig index 041810a8b..6396f5447 100755 --- a/contrib/packaging/inject-baseconfig +++ b/contrib/packaging/inject-baseconfig @@ -11,21 +11,8 @@ if [ -z "${BASECONFIGS}" ]; then exit 0 fi -# setup-root-conf.toml is composefs-specific; ostree uses prepare-root.conf -# which has a different (INI) format and different option names. -case "${VARIANT}" in - composefs*) - TARGET="/usr/lib/composefs/setup-root-conf.toml" - ;; - *) - echo "inject-baseconfig: baseconfigs not supported for variant '${VARIANT}'" >&2 - exit 1 - ;; -esac - -mkdir -p "$(dirname "${TARGET}")" - -# Split on commas and process each token +# Split and process tokens; unified-storage is handled before the variant check +# because it is backend-independent (works with both ostree and composefs). IFS=',' read -ra TOKENS <<< "${BASECONFIGS}" for raw_token in "${TOKENS[@]}"; do # Trim leading/trailing spaces @@ -35,27 +22,50 @@ for raw_token in "${TOKENS[@]}"; do [ -z "${token}" ] && continue case "${token}" in - etc-transient) - printf '[etc]\ntransient = true\n' >> "${TARGET}" - ;; - root-transient) - printf '[root]\ntransient = true\n' >> "${TARGET}" - ;; - var-volatile) - # Mount /var as a fresh tmpfs on every boot via systemd.volatile=state. - # bootc-root-setup detects this karg in the initramfs and automatically - # skips the /var state bind-mount, leaving /var as an empty directory - # from the composefs image. systemd-fstab-generator then mounts a fresh - # tmpfs there at local-fs.target. Using a plain tmpfs avoids the - # overlayfs-on-overlayfs restriction that breaks tools like podman which - # use overlayfs under /var/lib/containers. - mkdir -p /usr/lib/bootc/kargs.d - printf 'kargs = ["systemd.volatile=state"]\n' \ - > /usr/lib/bootc/kargs.d/50-var-volatile.toml + unified-storage) + # Write the bootc install config to enable unified storage at install time. + # This is backend-independent and works with both ostree and composefs variants. + mkdir -p /usr/lib/bootc/install + printf '[install.storage]\nunified = "enabled-with-copy"\n' \ + > /usr/lib/bootc/install/00-storage.toml ;; *) - echo "Unknown baseconfig: ${token}" >&2 - exit 1 + # All other tokens require the composefs variant (they write to composefs-specific paths). + # Validate variant here, not at the top, so unified-storage can run on any variant. + case "${VARIANT}" in + composefs*) + TARGET="/usr/lib/composefs/setup-root-conf.toml" + mkdir -p "$(dirname "${TARGET}")" + ;; + *) + echo "inject-baseconfig: baseconfig '${token}' not supported for variant '${VARIANT}'" >&2 + exit 1 + ;; + esac + case "${token}" in + etc-transient) + printf '[etc]\ntransient = true\n' >> "${TARGET}" + ;; + root-transient) + printf '[root]\ntransient = true\n' >> "${TARGET}" + ;; + var-volatile) + # Mount /var as a fresh tmpfs on every boot via systemd.volatile=state. + # bootc-root-setup detects this karg in the initramfs and automatically + # skips the /var state bind-mount, leaving /var as an empty directory + # from the composefs image. systemd-fstab-generator then mounts a fresh + # tmpfs there at local-fs.target. Using a plain tmpfs avoids the + # overlayfs-on-overlayfs restriction that breaks tools like podman which + # use overlayfs under /var/lib/containers. + mkdir -p /usr/lib/bootc/kargs.d + printf 'kargs = ["systemd.volatile=state"]\n' \ + > /usr/lib/bootc/kargs.d/50-var-volatile.toml + ;; + *) + echo "Unknown baseconfig: ${token}" >&2 + exit 1 + ;; + esac ;; esac done diff --git a/crates/lib/src/bootc_composefs/export.rs b/crates/lib/src/bootc_composefs/export.rs index ea760e603..6fe774ff4 100644 --- a/crates/lib/src/bootc_composefs/export.rs +++ b/crates/lib/src/bootc_composefs/export.rs @@ -7,44 +7,27 @@ use composefs_ctl::composefs; use composefs_ctl::composefs_oci; use composefs_oci::open_config; use ocidir::{OciDir, oci_spec::image::Platform}; +use ostree_ext::container::ImageReference; use ostree_ext::container::Transport; use ostree_ext::container::skopeo; use tar::EntryType; use crate::image::get_imgrefs_for_copy; use crate::{ - bootc_composefs::status::{get_composefs_status, get_imginfo}, - store::{BootedComposefs, Storage}, + bootc_composefs::status::{ImgConfigManifest, get_composefs_status, get_imginfo}, + store::{BootedComposefs, ComposefsRepository, Storage}, }; -/// Exports a composefs repository to a container image in containers-storage: -pub async fn export_repo_to_image( - storage: &Storage, - booted_cfs: &BootedComposefs, - source: Option<&str>, - target: Option<&str>, +/// Streams a composefs OCI image out to a destination image reference. +/// +/// Given a composefs repository handle and image metadata (manifest + config), +/// reconstructs the container image by reading layer data from the composefs +/// splitstreams and copies the assembled OCI image to `dest_imgref` via skopeo. +pub(crate) async fn export_composefs_to_dest( + composefs_repo: &ComposefsRepository, + imginfo: &ImgConfigManifest, + dest_imgref: &ImageReference, ) -> Result<()> { - let host = get_composefs_status(storage, booted_cfs).await?; - - let (source, dest_imgref) = get_imgrefs_for_copy(&host, source, target).await?; - - let mut depl_verity = None; - - for depl in host.list_deployments() { - let img = &depl.image.as_ref().unwrap().image; - - // Not checking transport here as we'll be pulling from the repo anyway - // So, image name is all we need - if img.image == source.name { - depl_verity = Some(depl.require_composefs()?.verity.clone()); - break; - } - } - - let depl_verity = depl_verity.ok_or_else(|| anyhow::anyhow!("Image {source} not found"))?; - - let imginfo = get_imginfo(storage, &depl_verity)?; - let config_digest = imginfo.manifest.config().digest().clone(); let var_tmp = @@ -54,7 +37,7 @@ pub async fn export_repo_to_image( let oci_dir = OciDir::ensure(tmpdir.try_clone()?).context("Opening OCI")?; // Use composefs_oci::open_config to get the config and layer map - let open = open_config(&*booted_cfs.repo, &config_digest, None).context("Opening config")?; + let open = open_config(composefs_repo, &config_digest, None).context("Opening config")?; let config = open.config; let layer_map = open.layer_refs; @@ -77,7 +60,7 @@ pub async fn export_repo_to_image( .get(old_diff_id.as_str()) .ok_or_else(|| anyhow::anyhow!("Layer {old_diff_id} not found in config"))?; - let mut layer_stream = booted_cfs.repo.open_stream("", Some(layer_verity), None)?; + let mut layer_stream = composefs_repo.open_stream("", Some(layer_verity), None)?; let mut layer_writer = oci_dir.create_layer(None)?; layer_writer.follow_symlinks(false); @@ -113,7 +96,7 @@ pub async fn export_repo_to_image( match layer_stream.read_exact(size as usize, ((size as usize) + 511) & !511)? { SplitStreamData::External(obj_id) => match header.entry_type() { EntryType::Regular | EntryType::Continuous => { - let file = File::from(booted_cfs.repo.open_object(&obj_id)?); + let file = File::from(composefs_repo.open_object(&obj_id)?); layer_writer .append(&header, file) @@ -196,7 +179,7 @@ pub async fn export_repo_to_image( skopeo::copy( &tempoci, - &dest_imgref, + dest_imgref, None, Some(( std::sync::Arc::new(tmpdir.try_clone()?.into()), @@ -208,3 +191,37 @@ pub async fn export_repo_to_image( Ok(()) } + +/// Exports a composefs repository to a container image in containers-storage: +pub async fn export_repo_to_image( + storage: &Storage, + booted_cfs: &BootedComposefs, + source: Option<&str>, + target: Option<&str>, +) -> Result<()> { + let host = get_composefs_status(storage, booted_cfs).await?; + + let (source, dest_imgref) = get_imgrefs_for_copy(&host, source, target).await?; + + let mut depl_verity = None; + + for depl in host.list_deployments() { + let img = &depl.image.as_ref().unwrap().image; + + // Not checking transport here as we'll be pulling from the repo anyway + // So, image name is all we need + if img.image == source.name { + depl_verity = Some(depl.require_composefs()?.verity.clone()); + break; + } + } + + let depl_verity = depl_verity.ok_or_else(|| anyhow::anyhow!("Image {source} not found"))?; + + let imginfo = get_imginfo(storage, &depl_verity)?; + + println!("Copying local image {source} to {dest_imgref} ..."); + export_composefs_to_dest(&booted_cfs.repo, &imginfo, &dest_imgref).await?; + println!("Pushed: {dest_imgref}"); + Ok(()) +} diff --git a/crates/lib/src/bootc_composefs/repo.rs b/crates/lib/src/bootc_composefs/repo.rs index 1f59fb8d7..3e4d36df9 100644 --- a/crates/lib/src/bootc_composefs/repo.rs +++ b/crates/lib/src/bootc_composefs/repo.rs @@ -41,6 +41,7 @@ pub(crate) async fn initialize_composefs_repository( root_setup: &RootSetup, allow_missing_fsverity: bool, use_unified: bool, + local_fetch: LocalFetchOpt, ) -> Result> { const COMPOSEFS_REPO_INIT_JOURNAL_ID: &str = "5d4c3b2a1f0e9d8c7b6a5f4e3d2c1b0a9"; @@ -99,7 +100,7 @@ pub(crate) async fn initialize_composefs_repository( let imgstore = CStorage::create(rootfs_dir, &run, sepolicy.as_ref())?; let storage_path = root_setup.physical_root_path.join(CStorage::subpath()); - let r = pull_composefs_unified(&imgstore, storage_path.as_str(), &repo, &imgref).await?; + let r = pull_composefs_unified(&imgstore, storage_path.as_str(), &repo, &imgref, local_fetch).await?; // SELinux-label the containers-storage now that all pulls are done. imgstore @@ -179,16 +180,18 @@ async fn pull_composefs_unified( storage_path: &str, repo: &Arc, imgref: &containers_image_proxy::ImageReference, + local_fetch: LocalFetchOpt, ) -> Result> { let image = &imgref.name; // Stage 1: get the image into bootc-owned containers-storage. if imgref.transport == containers_image_proxy::Transport::ContainerStorage { - // The image is in the default containers-storage (/var/lib/containers/storage). - // Copy it into bootc-owned storage. + // The image is in a containers-storage instance — either the default + // /var/lib/containers/storage or an additional image store advertised + // via STORAGE_OPTS (e.g. the bcvk virtiofs mount). tracing::info!("Unified pull: copying {image} from host containers-storage"); imgstore - .pull_from_host_storage(image) + .pull_from_containers_storage(image) .await .context("Copying image from host containers-storage into bootc storage")?; } else { @@ -210,11 +213,11 @@ async fn pull_composefs_unified( let storage = std::path::Path::new(storage_path); let pull_opts = PullOptions { // The image is already in bootc-owned containers-storage at this point - // (placed there by Stage 1 of the unified pull). Use ZeroCopy so we - // actually import via reflink/hardlink and fail loudly if that isn't - // possible — a plain copy fallback here would mean Stage 1 and Stage 2 - // are on different filesystems or the storage root is wrong. - local_fetch: LocalFetchOpt::ZeroCopy, + // (placed there by Stage 1 of the unified pull). CopyMode controls + // whether a fallback to byte copies is acceptable: + // ZeroCopy → fail if reflinks unavailable (storage.unified = "enabled") + // IfPossible → byte-copy fallback ok (storage.unified = "enabled-with-copy") + local_fetch, storage_root: Some(storage), ..Default::default() }; @@ -240,6 +243,7 @@ pub(crate) async fn pull_composefs_repo( spec_imgref: &crate::spec::ImageReference, allow_missing_fsverity: bool, use_unified: bool, + local_fetch: LocalFetchOpt, ) -> Result { const COMPOSEFS_PULL_JOURNAL_ID: &str = "4c3b2a1f0e9d8c7b6a5f4e3d2c1b0a9f8"; @@ -290,7 +294,7 @@ pub(crate) async fn pull_composefs_repo( let imgstore = CStorage::create(&rootfs_dir, &run, sepolicy.as_ref())?; let storage_path = format!("/sysroot/{}", CStorage::subpath()); - pull_composefs_unified(&imgstore, &storage_path, &repo, &imgref).await? + pull_composefs_unified(&imgstore, &storage_path, &repo, &imgref, local_fetch).await? } else { pull_composefs_direct(&repo, &imgref).await? }; diff --git a/crates/lib/src/bootc_composefs/status.rs b/crates/lib/src/bootc_composefs/status.rs index bd8b7a19c..7ad00c92b 100644 --- a/crates/lib/src/bootc_composefs/status.rs +++ b/crates/lib/src/bootc_composefs/status.rs @@ -970,6 +970,12 @@ async fn composefs_deployment_status_from( host.status.usr_overlay = get_composefs_usr_overlay_status().ok().flatten(); + // Populate storage status from bootc repo metadata stored on the physical root. + host.status.storage = crate::store::BootcRepoMeta::read(&storage.physical_root) + .ok() + .flatten() + .map(|meta| crate::spec::StorageStatus { unified: meta.unified }); + set_soft_reboot_capability(storage, &mut host, sorted_bls_config, cmdline)?; Ok(host) diff --git a/crates/lib/src/bootc_composefs/switch.rs b/crates/lib/src/bootc_composefs/switch.rs index f6b3855be..367ecbf63 100644 --- a/crates/lib/src/bootc_composefs/switch.rs +++ b/crates/lib/src/bootc_composefs/switch.rs @@ -51,23 +51,11 @@ pub(crate) async fn switch_composefs( let repo = &*booted_cfs.repo; let (image, img_config) = is_image_pulled(repo, &target_imgref).await?; - // Use unified storage if explicitly requested, or auto-detect: either the - // target image is already in bootc-owned containers-storage, OR the booted - // image is — which means the user has opted into unified storage and all - // subsequent operations (including switch to a new image) should use it. - let use_unified = if opts.unified_storage_exp { - true - } else { - let booted_imgref = host.spec.image.as_ref(); - let booted_unified = if let Some(booted) = booted_imgref { - crate::deploy::image_exists_in_unified_storage(storage, booted).await? - } else { - false - }; - let target_unified = - crate::deploy::image_exists_in_unified_storage(storage, &target_imgref).await?; - booted_unified || target_unified - }; + // Use unified storage if explicitly requested via flag, or if the + // composefs/bootc.json marker says unified storage is enabled on this system. + let use_unified = opts.unified_storage_exp + || crate::deploy::unified_storage_enabled(storage) + .context("Checking unified storage flag")?; let do_upgrade_opts = DoUpgradeOpts { soft_reboot: opts.soft_reboot, diff --git a/crates/lib/src/bootc_composefs/update.rs b/crates/lib/src/bootc_composefs/update.rs index e88d1f3e9..8380a8fbe 100644 --- a/crates/lib/src/bootc_composefs/update.rs +++ b/crates/lib/src/bootc_composefs/update.rs @@ -260,6 +260,8 @@ pub(crate) async fn do_upgrade( imgref, booted_cfs.cmdline.allow_missing_fsverity, opts.use_unified, + // Runtime upgrade: reflinks were proven at install time; allow copies as fallback. + composefs_ctl::composefs_oci::LocalFetchOpt::IfPossible, ) .await?; @@ -416,17 +418,11 @@ pub(crate) async fn upgrade_composefs( let imgref = derived_image.as_ref().or(current_image); let mut booted_imgref = imgref.ok_or_else(|| anyhow::anyhow!("No image source specified"))?; - // Auto-detect unified storage: use the unified path if the target image is - // already in bootc-owned containers-storage, OR if the booted image is — - // the latter means the user has opted into unified storage and all - // subsequent operations should use it. - let current_unified = if let Some(current) = current_image { - crate::deploy::image_exists_in_unified_storage(storage, current).await? - } else { - false - }; - do_upgrade_opts.use_unified = current_unified - || crate::deploy::image_exists_in_unified_storage(storage, booted_imgref).await?; + // Use unified storage if the composefs/bootc.json marker says it is enabled + // on this system (written by `bootc image set-unified` or by install with + // `--experimental-unified-storage`). + do_upgrade_opts.use_unified = crate::deploy::unified_storage_enabled(storage) + .context("Checking unified storage flag")?; let repo = &*composefs.repo; diff --git a/crates/lib/src/cli.rs b/crates/lib/src/cli.rs index 0be914c46..87b9d3e37 100644 --- a/crates/lib/src/cli.rs +++ b/crates/lib/src/cli.rs @@ -587,7 +587,20 @@ pub(crate) enum ImageOpts { /// /// This onboards the system to the unified storage path so that future /// upgrade/switch operations can read from the bootc storage directly. - SetUnified, + /// + /// By default, FICLONE reflinks are required; the command will fail if the + /// underlying filesystem does not support them. Pass `--enabled-with-copy` to + /// allow a byte-copy fallback on filesystems without reflink support. + SetUnified { + /// Allow byte-copy fallback when the filesystem does not support reflinks. + /// + /// When set, the system is onboarded with `storage.unified = "enabled-with-copy"`; + /// unified storage is still active but block sharing between the composefs + /// and ostree repos is not guaranteed. Without this flag the command + /// fails hard if FICLONE is unavailable. + #[clap(long)] + enabled_with_copy: bool, + }, /// Copy a container image from the default `containers-storage:` to the bootc-owned container storage. PullFromDefaultStorage { /// The image to pull @@ -619,6 +632,28 @@ pub(crate) enum FsverityOpts { }, } +/// Subcommands for `bootc internals fsck`. +#[derive(Debug, clap::Subcommand, PartialEq, Eq)] +pub(crate) enum FsckCheck { + /// Check image store metadata consistency. + /// + /// Verifies that every image bootc has committed to (i.e. has a composefs + /// GC tag) is also present in bootc's containers-storage. This is a + /// metadata-level check only — it does not walk composefs objects or verify + /// content integrity. Exits non-zero if any image is partially imported. + /// + /// LBIs (logically bound images) are not subject to this check; only images + /// that went through the unified storage pipeline are checked. + Images { + /// Attempt to repair detected inconsistencies. + #[clap(long)] + repair: bool, + /// Output results as JSON. + #[clap(long)] + json: bool, + }, +} + /// Hidden, internal only options #[derive(Debug, clap::Subcommand, PartialEq, Eq)] pub(crate) enum InternalsOpts { @@ -641,7 +676,13 @@ pub(crate) enum InternalsOpts { #[clap(subcommand)] Fsverity(FsverityOpts), /// Perform consistency checking. - Fsck, + /// + /// Without a subcommand, runs all checks. Use a subcommand to run a + /// specific subset. + Fsck { + #[clap(subcommand)] + check: Option, + }, /// Perform cleanup actions Cleanup, Relabel { @@ -1215,10 +1256,8 @@ async fn upgrade( return Ok(()); } - // Ensure the bootc storage directory is initialized; the --check path - // needs this for update_mtime() and the non-check path needs it for - // unified pull detection. - let use_unified = crate::deploy::image_exists_in_unified_storage(storage, imgref).await?; + // Check whether the composefs/bootc.json flag says unified storage is enabled. + let use_unified = crate::deploy::unified_storage_enabled(storage)?; if opts.check { let ostree_imgref = imgref.clone().into(); @@ -1246,16 +1285,8 @@ async fn upgrade( } } else { let fetched = if use_unified { - crate::deploy::pull_unified( - repo, - imgref, - None, - opts.quiet, - prog.clone(), - storage, - Some(&booted_ostree.deployment), - ) - .await? + // Runtime upgrade: reflinks were proven at install time; allow copies as fallback. + crate::deploy::pull_via_composefs(repo, imgref, storage, composefs_ctl::composefs_oci::LocalFetchOpt::IfPossible).await? } else { crate::deploy::pull( repo, @@ -1417,24 +1448,13 @@ async fn switch_ostree( // Determine whether to use unified storage path. // If explicitly requested via flag, use unified storage directly. - // Otherwise, auto-detect based on whether the image exists in bootc storage. - let use_unified = if opts.unified_storage_exp { - true - } else { - crate::deploy::image_exists_in_unified_storage(storage, &target).await? - }; + // Otherwise, check the composefs/bootc.json flag. + let use_unified = opts.unified_storage_exp + || crate::deploy::unified_storage_enabled(storage)?; let fetched = if use_unified { - crate::deploy::pull_unified( - repo, - &target, - None, - opts.quiet, - prog.clone(), - storage, - Some(&booted_ostree.deployment), - ) - .await? + // Runtime switch: reflinks were proven at install time; allow copies as fallback. + crate::deploy::pull_via_composefs(repo, &target, storage, composefs_ctl::composefs_oci::LocalFetchOpt::IfPossible).await? } else { crate::deploy::pull( repo, @@ -1972,7 +1992,9 @@ async fn run_from_opt(opt: Opt) -> Result<()> { } } } - ImageOpts::SetUnified => crate::image::set_unified_entrypoint().await, + ImageOpts::SetUnified { enabled_with_copy } => { + crate::image::set_unified_entrypoint(enabled_with_copy).await + } ImageOpts::PullFromDefaultStorage { image } => { let storage = get_storage().await?; storage @@ -2092,11 +2114,45 @@ async fn run_from_opt(opt: Opt) -> Result<()> { Ok(()) } }, - InternalsOpts::Cfs { args } => composefs_ctl::run_from_iter(args.iter()).await, + InternalsOpts::Cfs { args } => { + // Inject `--system` (which resolves to /sysroot/composefs) unless the + // caller has already specified a repo location via --repo, --user, or + // --system. This ensures `bootc internals cfsctl oci images` (and + // similar subcommands) operate on the booted composefs repository by + // default rather than falling back to cfsctl's own heuristic. + let has_repo_flag = args.iter().any(|a| { + let s = a.to_string_lossy(); + s == "--repo" || s.starts_with("--repo=") || s == "--user" || s == "--system" + }); + if has_repo_flag { + composefs_ctl::run_from_iter(args.iter()).await + } else { + composefs_ctl::run_from_iter( + std::iter::once(OsString::from("--system")).chain(args.into_iter()), + ) + .await + } + } InternalsOpts::Reboot => crate::reboot::reboot(), - InternalsOpts::Fsck => { + InternalsOpts::Fsck { check } => { let storage = &get_storage().await?; - crate::fsck::fsck(&storage, std::io::stdout().lock()).await?; + match check { + // `bootc internals fsck images [--repair] [--json]` + Some(FsckCheck::Images { repair, json }) => { + let ok = crate::fsck::fsck_images(storage, repair, json).await?; + if !ok { + std::process::exit(1); + } + } + // `bootc internals fsck` with no subcommand — run everything. + None => { + crate::fsck::fsck(storage, std::io::stdout().lock()).await?; + let ok = crate::fsck::fsck_images(storage, false, false).await?; + if !ok { + std::process::exit(1); + } + } + } Ok(()) } InternalsOpts::FixupEtcFstab => crate::deploy::fixup_etc_fstab(&root), diff --git a/crates/lib/src/composefs_consts.rs b/crates/lib/src/composefs_consts.rs index 8617f1005..c0d52fe27 100644 --- a/crates/lib/src/composefs_consts.rs +++ b/crates/lib/src/composefs_consts.rs @@ -53,4 +53,17 @@ pub(crate) const UKI_NAME_PREFIX: &str = TYPE1_BOOT_DIR_PREFIX; /// Tags are created as `localhost/bootc-` to act as GC roots /// that keep the manifest, config, and layer splitstreams alive. This is /// analogous to how ostree uses `ostree/` refs. +/// +/// Note: the `localhost/bootc-` prefix is a bootc internal convention and has +/// nothing to do with the image's actual registry name. A production image from +/// `quay.io/myorg/myos:latest` still gets tagged `localhost/bootc-sha256:...`. +/// +/// # Future work +/// +/// When unified storage expands to cover application container images (not just +/// the OS deployment image), we will need a richer tag namespace to distinguish +/// image kinds — e.g. `bootc:os/` vs `bootc:app/`. At that point +/// we will also need to ensure that GC only prunes tags owned by bootc, and leaves +/// refs created by other tools (e.g. cfsctl, or a future container runtime) intact. +/// For now `localhost/bootc-` is sufficient and its on-disk format is stable. pub(crate) const BOOTC_TAG_PREFIX: &str = "localhost/bootc-"; diff --git a/crates/lib/src/deploy.rs b/crates/lib/src/deploy.rs index e82314629..76eebb9bf 100644 --- a/crates/lib/src/deploy.rs +++ b/crates/lib/src/deploy.rs @@ -5,11 +5,9 @@ use std::collections::HashSet; use std::io::{BufRead, Write}; use std::os::fd::AsFd; -use std::process::Command; use anyhow::{Context, Result, anyhow}; use bootc_kernel_cmdline::utf8::CmdlineOwned; -use bootc_utils::skopeo_bin; use cap_std::fs::{Dir, MetadataExt}; use cap_std_ext::cap_std; use cap_std_ext::dirext::CapStdExtDirExt; @@ -111,23 +109,6 @@ pub(crate) async fn new_importer( Ok(imp) } -/// Wrapper for pulling a container image with a custom proxy config (e.g. for unified storage). -pub(crate) async fn new_importer_with_config( - repo: &ostree::Repo, - imgref: &ostree_container::OstreeImageReference, - config: ostree_ext::containers_image_proxy::ImageProxyConfig, - booted_deployment: Option<&ostree::Deployment>, -) -> Result { - let mut imp = ostree_container::store::ImageImporter::new(repo, imgref, config).await?; - imp.require_bootable(); - // We do our own GC/prune in deploy::prune(), so skip the importer's internal one. - imp.disable_gc(); - if let Some(deployment) = booted_deployment { - imp.set_sepolicy_commit(deployment.csum().to_string()); - } - Ok(imp) -} - pub(crate) fn check_bootc_label(config: &ostree_ext::oci_spec::image::ImageConfiguration) { if let Some(label) = labels_of_config(config).and_then(|labels| labels.get(crate::metadata::BOOTC_COMPAT_LABEL)) @@ -373,12 +354,59 @@ pub(crate) async fn prune_container_store(sysroot: &Storage) -> Result<()> { }); } } - // Convert to a hashset of just the image names - let image_names = HashSet::from_iter(all_bound_images.iter().map(|img| img.image.as_str())); - let pruned = sysroot - .get_ensure_imgstore()? - .prune_except_roots(&image_names) - .await?; + + let imgstore = sysroot.get_ensure_imgstore()?; + + // Also protect images that have composefs tags — these are managed by the unified + // storage pipeline and must not be pruned even if no live deployment currently + // references them (e.g. after `bootc switch`). The composefs splitstreams depend + // on the cstorage data being present. + // + // We match by config digest (= image ID, stable across layer recompression) rather + // than manifest digest, mirroring how `bootc image list` cross-references the stores. + let mut composefs_protected_names: Vec = Vec::new(); + if let Ok(cfs_repo) = sysroot.get_ensure_composefs() { + use composefs_ctl::composefs::fsverity::Sha512HashValue; + use composefs_ctl::composefs_oci::{self, OciImage}; + if let Ok(refs) = composefs_oci::list_refs(&*cfs_repo) { + // Collect config digests for every composefs-tagged image. + let cfs_config_digests: HashSet = refs + .into_iter() + .filter(|(tag, _)| tag.starts_with(crate::composefs_consts::BOOTC_TAG_PREFIX)) + .filter_map(|(_tag, manifest_digest)| { + OciImage::::open(&*cfs_repo, &manifest_digest, None) + .ok() + .map(|img| img.manifest().config().digest().to_string()) + }) + .collect(); + + if !cfs_config_digests.is_empty() { + // List cstorage images and find those whose ID matches a composefs config digest. + // The cstorage ID is the bare hex string; the composefs digest is "sha256:". + let all_cstor = imgstore.list_images().await?; + for entry in all_cstor { + if cfs_config_digests.contains(&format!("sha256:{}", entry.id)) + || cfs_config_digests.contains(&entry.id) + { + if let Some(names) = entry.names { + composefs_protected_names.extend(names); + } + } + } + tracing::debug!( + "Composefs-protected cstorage image names: {:?}", + composefs_protected_names + ); + } + } + } + + // Build the final set of protected names, borrowing from both owned collections. + let mut image_names = + HashSet::from_iter(all_bound_images.iter().map(|img| img.image.as_str())); + image_names.extend(composefs_protected_names.iter().map(|s| s.as_str())); + + let pruned = imgstore.prune_except_roots(&image_names).await?; tracing::debug!("Pruned images: {}", pruned.len()); Ok(()) } @@ -423,16 +451,6 @@ pub(crate) fn check_disk_space_ostree( ) } -/// Verify there is sufficient disk space to pull an image into the composefs store -/// via the ostree unified-storage path (uses `PreparedImportMeta`). -pub(crate) fn check_disk_space_unified( - cfs: &crate::store::ComposefsRepository, - image_meta: &PreparedImportMeta, - imgref: &ImageReference, -) -> Result<()> { - check_disk_space_inner(cfs.objects_dir()?, image_meta.bytes_to_fetch, 0, imgref) -} - /// Verify there is sufficient disk space to pull an image into the composefs store /// for the native composefs backend (uses a raw `ImageManifest`). pub(crate) fn check_disk_space_composefs( @@ -503,154 +521,179 @@ pub(crate) async fn prepare_for_pull( Ok(PreparedPullResult::Ready(Box::new(prepared_image))) } -/// Check whether the image exists in bootc's unified container storage. +/// Check whether unified base-image storage is enabled on this system. /// -/// This is used for auto-detection: if the image already exists in bootc storage -/// (e.g., from a previous `bootc image set-unified` or LBI pull), we can use -/// the unified storage path for faster imports. +/// Returns `true` iff `composefs/bootc.json` exists and has `unified-storage: true`. +/// This is the authoritative signal written by `bootc image set-unified` (and by +/// `bootc install --experimental-unified-storage`). /// -/// Returns true if the image exists in bootc storage. -pub(crate) async fn image_exists_in_unified_storage( - store: &Storage, - imgref: &ImageReference, -) -> Result { - let imgstore = store.get_ensure_imgstore()?; - let image_ref_str = imgref.to_transport_image()?; - imgstore.exists(&image_ref_str).await +/// If the composefs repository doesn't exist yet, the file is absent and this +/// returns `false` — a single cheap file-open attempt with no side effects. +pub(crate) fn unified_storage_enabled(store: &Storage) -> Result { + Ok(crate::store::BootcRepoMeta::read(&store.physical_root)? + .map(|m| m.unified != crate::spec::UnifiedStorageState::Disabled) + .unwrap_or(false)) } -/// Unified approach: Use bootc's CStorage to pull the image, then prepare from containers-storage. -/// This reuses the same infrastructure as LBIs. -pub(crate) async fn prepare_for_pull_unified( +/// Full unified pipeline: containers-storage → composefs → ostree. +/// +/// Stage 1: Pull the image into bootc-owned containers-storage. +/// Stage 2: Zero-copy import from containers-storage into the composefs OCI repo. +/// Stage 3: Synthesize an ostree commit from the composefs filesystem tree. +/// +/// This is the implementation of `--experimental-unified-storage` for the ostree backend. +pub(crate) async fn pull_via_composefs( repo: &ostree::Repo, imgref: &ImageReference, - target_imgref: Option<&OstreeImageReference>, store: &Storage, - booted_deployment: Option<&ostree::Deployment>, -) -> Result { - // Get or initialize the bootc container storage (same as used for LBIs) + local_fetch: composefs_ctl::composefs_oci::LocalFetchOpt, +) -> Result> { + use composefs_ctl::composefs_oci; + use composefs_ctl::composefs_oci::oci_image::OciImage; + use composefs_ctl::composefs_oci::{LocalFetchOpt, PullOptions, tag_image}; + use ostree_ext::container::composefs_import; + use ostree_ext::container::store::ref_for_image; + + // Stage 1: pull into bootc-owned containers-storage. + // Ensure the floating c/storage is initialized so that libpod's database + // is set up with the correct static_dir before any image operations. + // Without this, `podman image exists` (used in pull_from_containers_storage) + // exits with code 125 (database static_dir mismatch) and the skip-if-present + // optimization incorrectly falls through to pull_from_host_storage. + crate::podstorage::ensure_floating_c_storage_initialized(); let imgstore = store.get_ensure_imgstore()?; - let image_ref_str = imgref.to_transport_image()?; - - // Always pull to ensure we have the latest image, whether from a remote - // registry or a locally rebuilt image tracing::info!( - "Unified pull: pulling from transport '{}' to bootc storage", - &imgref.transport + "Composefs unified pull: staging {} into containers-storage", + imgref.image ); + if imgref.transport == "containers-storage" { + imgstore + .pull_from_containers_storage(&imgref.image) + .await + .context("Copying image from host containers-storage into bootc storage")?; + } else { + imgstore + .pull_with_progress(&image_ref_str) + .await + .context("Pulling image into bootc containers-storage")?; + } - // Pull the image into bootc containers-storage with per-layer - // download progress via the podman native API. - imgstore - .pull_with_progress(&image_ref_str) - .await - .context("Pulling image into bootc containers-storage")?; - - // Now create a containers-storage reference to read from bootc storage - tracing::info!("Unified pull: now importing from containers-storage transport"); - let containers_storage_imgref = ImageReference { - transport: "containers-storage".to_string(), - image: imgref.image.clone(), - signature: imgref.signature.clone(), - }; - let ostree_imgref = OstreeImageReference::from(containers_storage_imgref); - - // Configure the importer to use bootc storage as an additional image store - let mut config = new_proxy_config(); - let mut cmd = Command::new(skopeo_bin()); - // Use the physical path to bootc storage from the Storage struct + // Stage 2: zero-copy import from containers-storage into the composefs OCI repo. + let cfs_repo = store.get_ensure_composefs()?; + let cstor_imgref_str = format!("containers-storage:{}", imgref.image); let storage_path = format!( "{}/{}", store.physical_root_path, crate::podstorage::CStorage::subpath() ); - crate::podstorage::set_additional_image_store(&mut cmd, &storage_path); - config.skopeo_cmd = Some(cmd); + tracing::info!( + "Composefs unified pull: importing {} into composefs repo (zero-copy)", + cstor_imgref_str + ); + let pull_opts = PullOptions { + local_fetch, + storage_root: Some(std::path::Path::new(&storage_path)), + ..Default::default() + }; + let pull_result = composefs_oci::pull(&cfs_repo, &cstor_imgref_str, None, pull_opts) + .await + .context("Importing from containers-storage into composefs repo")?; + + // Tag the manifest as a GC root in the composefs repo. + let tag = + crate::bootc_composefs::repo::bootc_tag_for_manifest(&pull_result.manifest_digest.to_string()); + tag_image(&*cfs_repo, &pull_result.manifest_digest, &tag) + .context("Tagging pulled image as bootc GC root in composefs repo")?; + + // Open the OCI image to retrieve manifest + config for the ostree synthesis. + let oci_image = OciImage::open(&cfs_repo, &pull_result.manifest_digest, None) + .context("Opening OCI image from composefs repo")?; + let manifest = oci_image.manifest().clone(); + let config = oci_image + .config() + .cloned() + .context("OCI image has no config (artifact, not a container image)")?; + + let manifest_digest_str = pull_result.manifest_digest.to_string(); + + // Stage 3: synthesize ostree commit from composefs tree (blocking, CPU-bound). + tracing::info!( + "Composefs unified pull: synthesizing ostree commit from composefs tree (digest {})", + manifest_digest_str + ); + let repo_clone = repo.clone(); + let cfs_repo_clone = std::sync::Arc::clone(&cfs_repo); + let config_digest = pull_result.config_digest.clone(); + let manifest_digest_str2 = manifest_digest_str.clone(); + let ostree_commit = tokio::task::spawn_blocking(move || { + composefs_import::import_from_composefs_repo( + &repo_clone, + &cfs_repo_clone, + &config_digest, + &manifest_digest_str2, + &manifest, + &config, + local_fetch == LocalFetchOpt::ZeroCopy, + ) + }) + .await + .context("join error in composefs→ostree import task")? + .context("Synthesizing ostree commit from composefs tree")?; - // Use the preparation flow with the custom config - let mut imp = new_importer_with_config(repo, &ostree_imgref, config, booted_deployment).await?; - if let Some(target) = target_imgref { - imp.set_target(target); + // Write the ostree ref so the deployment machinery can find the commit. + { + let ostree_imgref = OstreeImageReference::from(imgref.clone()); + let ostree_ref = ref_for_image(&ostree_imgref.imgref) + .context("Computing ostree ref for image")?; + let txn = repo + .auto_transaction(gio::Cancellable::NONE) + .context("Beginning ostree transaction for ref write")?; + repo.transaction_set_ref(None, &ostree_ref, Some(ostree_commit.as_str())); + txn.commit(gio::Cancellable::NONE) + .context("Committing ostree ref transaction")?; } - let prep = match imp.prepare().await? { - PrepareResult::AlreadyPresent(c) => { - println!("No changes in {imgref:#} => {}", c.manifest_digest); - return Ok(PreparedPullResult::AlreadyPresent(Box::new((*c).into()))); - } - PrepareResult::Ready(p) => p, - }; - check_bootc_label(&prep.config); - if let Some(warning) = prep.deprecated_warning() { - ostree_ext::cli::print_deprecated_warning(warning).await; + + // Extract version from the config labels. + let version = oci_image + .config() + .and_then(|cfg| ostree_ext::container::version_for_config(cfg)) + .map(|s| s.to_owned()); + + // Parse the manifest digest into the oci_spec::image::Digest type that + // ImageState expects. The string is already in "sha256:..." format. + let manifest_digest: Digest = manifest_digest_str + .parse() + .with_context(|| format!("Parsing manifest digest {manifest_digest_str}"))?; + + // Write composefs/bootc.json to mark this system as unified-storage enabled. + // This ensures pull_auto uses the three-store pipeline on all subsequent + // upgrades, even if the user didn't explicitly run `bootc image set-unified`. + { + crate::store::ensure_composefs_dir(&store.physical_root)?; + let mut meta = crate::store::BootcRepoMeta::read(&store.physical_root)? + .unwrap_or_default(); + meta.version = 1; + meta.unified = if local_fetch == LocalFetchOpt::ZeroCopy { + crate::spec::UnifiedStorageState::Enabled + } else { + crate::spec::UnifiedStorageState::EnabledWithCopy + }; + meta.write(&store.physical_root) + .context("Writing unified-storage flag after composefs pull")?; } - ostree_ext::cli::print_layer_status(&prep); - let layers_to_fetch = prep.layers_to_fetch().collect::>>()?; - // Log that we're importing a new image from containers-storage - const PULLING_NEW_IMAGE_ID: &str = "6d5e4f3a2b1c0d9e8f7a6b5c4d3e2f1a0"; tracing::info!( - message_id = PULLING_NEW_IMAGE_ID, - bootc.image.reference = &imgref.image, - bootc.image.transport = "containers-storage", - bootc.original_transport = &imgref.transport, - bootc.status = "importing_from_storage", - "Importing image from bootc storage: {}", - ostree_imgref + "Composefs unified pull complete: commit {} digest {}", + ostree_commit, + manifest_digest ); - let prepared_image = PreparedImportMeta { - imp, - n_layers_to_fetch: layers_to_fetch.len(), - layers_total: prep.all_layers().count(), - bytes_to_fetch: layers_to_fetch.iter().map(|(l, _)| l.layer.size()).sum(), - bytes_total: prep.all_layers().map(|l| l.layer.size()).sum(), - digest: prep.manifest_digest.clone(), - prep, - }; - - Ok(PreparedPullResult::Ready(Box::new(prepared_image))) -} - -/// Unified pull: Use podman to pull to containers-storage, then read from there -pub(crate) async fn pull_unified( - repo: &ostree::Repo, - imgref: &ImageReference, - target_imgref: Option<&OstreeImageReference>, - quiet: bool, - prog: ProgressWriter, - store: &Storage, - booted_deployment: Option<&ostree::Deployment>, -) -> Result> { - match prepare_for_pull_unified(repo, imgref, target_imgref, store, booted_deployment).await? { - PreparedPullResult::AlreadyPresent(existing) => { - // Log that the image was already present (Debug level since it's not actionable) - const IMAGE_ALREADY_PRESENT_ID: &str = "5c4d3e2f1a0b9c8d7e6f5a4b3c2d1e0f9"; - tracing::debug!( - message_id = IMAGE_ALREADY_PRESENT_ID, - bootc.image.reference = &imgref.image, - bootc.image.transport = &imgref.transport, - bootc.status = "already_present", - "Image already present: {}", - imgref - ); - Ok(existing) - } - PreparedPullResult::Ready(prepared_image_meta) => { - check_disk_space_unified( - store.get_ensure_composefs()?.as_ref(), - &prepared_image_meta, - imgref, - )?; - // To avoid duplicate success logs, pass a containers-storage imgref to the importer - let cs_imgref = ImageReference { - transport: "containers-storage".to_string(), - image: imgref.image.clone(), - signature: imgref.signature.clone(), - }; - pull_from_prepared(&cs_imgref, quiet, prog, *prepared_image_meta).await - } - } + Ok(Box::new(ImageState { + manifest_digest, + version, + ostree_commit, + })) } #[context("Pulling")] @@ -779,6 +822,108 @@ pub(crate) async fn wipe_ostree(sysroot: Sysroot) -> Result<()> { Ok(()) } +/// Prune composefs objects no longer referenced by any live deployment. +/// +/// This is a no-op if unified storage is not enabled or the composefs +/// repository doesn't exist yet. +/// +/// On an ostree+unified-storage system every `bootc upgrade` tags the newly +/// pulled manifest in the composefs repo (`localhost/bootc-sha256:`). +/// After the ostree prune step those old tags are no longer anchored to any +/// deployment. This function: +/// +/// 1. Collects the manifest digests of all current ostree deployments. +/// 2. Untagges any composefs bootc-tag whose digest is not in that set. +/// 3. Runs `repo.gc()` to drop orphaned splitstream objects. +#[context("Pruning composefs store")] +async fn prune_composefs_store(sysroot: &Storage) -> Result<()> { + if !unified_storage_enabled(sysroot)? { + return Ok(()); + } + let cfs_repo = match sysroot.get_ensure_composefs() { + Ok(r) => r, + Err(e) => { + tracing::debug!("Composefs repo not available, skipping prune: {e}"); + return Ok(()); + } + }; + + let ostree = match sysroot.get_ostree() { + Ok(o) => o, + Err(_) => return Ok(()), + }; + let repo = ostree.repo(); + + // Collect manifest digests from all current ostree deployments. + // Deployments pulled via pull_via_composefs store META_MANIFEST_DIGEST + // (`ostree.manifest-digest`) in their commit metadata. + let mut live_digests: std::collections::HashSet = + std::collections::HashSet::new(); + for deployment in ostree.deployments() { + let commit_str = deployment.csum(); + match repo.load_commit(commit_str.as_str()) { + Ok((commitv, _)) => { + match ostree_ext::container::store::manifest_digest_from_commit(&commitv) { + Ok(digest) => { + live_digests.insert(digest.to_string()); + } + Err(e) => { + // Not every deployment was pulled via composefs; skip gracefully. + tracing::debug!( + "No manifest digest in commit {commit_str}: {e}" + ); + } + } + } + Err(e) => { + tracing::warn!("Failed to load commit {commit_str}: {e}"); + } + } + } + + // The composefs repository API is synchronous; move the work off the + // async executor thread. + let cfs_repo = std::sync::Arc::clone(&cfs_repo); + tokio::task::spawn_blocking(move || -> Result<()> { + use composefs_ctl::composefs_oci; + use crate::composefs_consts::BOOTC_TAG_PREFIX; + + let all_tags = composefs_oci::list_refs(&*cfs_repo) + .context("Listing composefs OCI refs")?; + + let mut untagged = 0usize; + for (tag_name, _manifest_digest) in &all_tags { + if !tag_name.starts_with(BOOTC_TAG_PREFIX) { + // Not a bootc-owned tag; leave it for the user / other tools. + continue; + } + // The tag is `localhost/bootc-sha256:`. Strip the prefix to + // recover the canonical digest string (`sha256:`). + let digest_str = tag_name + .strip_prefix(BOOTC_TAG_PREFIX) + .expect("checked above"); + if !live_digests.contains(digest_str) { + tracing::debug!("Removing unreferenced composefs bootc tag: {tag_name}"); + composefs_oci::untag_image(&*cfs_repo, tag_name) + .with_context(|| format!("Removing composefs tag {tag_name}"))?; + untagged += 1; + } + } + + if untagged > 0 { + tracing::info!("Removed {untagged} unreferenced composefs tag(s); running GC"); + let gc_result = cfs_repo.gc(&[]).context("Running composefs GC")?; + tracing::debug!("Composefs GC result: {:?}", gc_result); + } else { + tracing::debug!("No unreferenced composefs tags; skipping GC"); + } + + Ok(()) + }) + .await + .context("composefs prune task join error")? +} + pub(crate) async fn cleanup(sysroot: &Storage) -> Result<()> { // Log the cleanup operation to systemd journal const CLEANUP_JOURNAL_ID: &str = "2f1a0b9c8d7e6f5a4b3c2d1e0f9a8b7c6"; @@ -840,6 +985,13 @@ pub(crate) async fn cleanup(sysroot: &Storage) -> Result<()> { // We run these in parallel mostly because we can. tokio::try_join!(repo_prune, bound_prune)?; + + // After ostree prune, clean up any stale composefs tags and GC orphaned + // splitstream objects on unified-storage systems. + prune_composefs_store(sysroot) + .await + .context("Pruning composefs store")?; + Ok(()) } diff --git a/crates/lib/src/fsck.rs b/crates/lib/src/fsck.rs index 25e95e33c..19397e735 100644 --- a/crates/lib/src/fsck.rs +++ b/crates/lib/src/fsck.rs @@ -268,6 +268,15 @@ async fn check_fsverity_inner(storage: &Storage) -> FsckResult { fsck_err(err) } +/// Check image store consistency. Delegates to [`crate::image::fsck_images`]. +/// +/// Returns `true` if all checks passed, `false` if any inconsistency was found. +/// When `repair` is `true`, attempts to restore images that are in composefs +/// but missing from containers-storage. +pub(crate) async fn fsck_images(storage: &Storage, repair: bool, json: bool) -> anyhow::Result { + crate::image::fsck_images(storage, repair, json).await +} + pub(crate) async fn fsck(storage: &Storage, mut output: impl std::io::Write) -> anyhow::Result<()> { let mut checks = FSCK_CHECKS.static_slice().iter().collect::>(); checks.sort_by(|a, b| a.ordering.cmp(&b.ordering)); diff --git a/crates/lib/src/image.rs b/crates/lib/src/image.rs index 8a6b17a54..c4149bc20 100644 --- a/crates/lib/src/image.rs +++ b/crates/lib/src/image.rs @@ -7,13 +7,16 @@ use bootc_utils::CommandRunExt; use cap_std_ext::cap_std::{self, fs::Dir}; use clap::ValueEnum; use comfy_table::{Table, presets::NOTHING}; +use composefs_ctl::composefs_oci; use fn_error_context::context; use ostree_ext::container::{ImageReference, Transport}; -use serde::Serialize; +use serde::{Serialize, Deserialize}; use crate::{ boundimage::query_bound_images, + bootc_composefs::status::ImgConfigManifest, cli::{ImageListFormat, ImageListType}, + composefs_consts::BOOTC_TAG_PREFIX, podstorage::CStorage, spec::Host, store::Storage, @@ -40,6 +43,9 @@ async fn image_exists_in_host_storage(image: &str) -> Result { enum ImageListTypeColumn { Host, Unified, + /// Image is in bootc containers-storage but composefs import is incomplete + /// or failed. Re-run `bootc upgrade` to complete the import. + Partial, Logical, } @@ -89,17 +95,88 @@ async fn list_host_images_composefs(sysroot: &crate::store::Storage) -> Result = + match sysroot.get_ensure_composefs() { + Ok(repo) => { + use composefs_ctl::composefs::fsverity::Sha512HashValue; + use composefs_ctl::composefs_oci::OciImage; + composefs_oci::list_refs(&*repo) + .context("Listing composefs OCI refs")? + .into_iter() + .filter(|(tag, _)| tag.starts_with(BOOTC_TAG_PREFIX)) + .filter_map(|(_tag, manifest_digest)| { + OciImage::::open(&repo, &manifest_digest, None) + .ok() + .map(|img| img.manifest().config().digest().to_string()) + }) + .collect() + } + Err(_) => Default::default(), + }; + + // Logically bound images (LBIs) are stored in bootc's containers-storage + // but are not OS images — they are never imported into the composefs OCI + // repo via the three-store pipeline. Showing them as "partial" would be + // misleading; they should remain "unified" (available to the system). + let rootfs = cap_std_ext::cap_std::fs::Dir::open_ambient_dir( + "/", + cap_std_ext::cap_std::ambient_authority(), + ) + .context("Opening rootfs")?; + let lbi_names: std::collections::HashSet = crate::boundimage::query_bound_images(&rootfs) + .unwrap_or_default() + .into_iter() + .map(|b| b.image) + .collect(); + + Ok(images + .into_iter() + .flat_map(|entry| { + let names = entry.names.unwrap_or_default(); + let image_type = if { + let id = &entry.id; + composefs_config_digests.contains(&format!("sha256:{id}")) + || composefs_config_digests.contains(id.as_str()) + } { + ImageListTypeColumn::Unified + } else if names.iter().any(|n| lbi_names.contains(n.as_str())) { + // Logically bound image — not a host OS image, stays unified. + ImageListTypeColumn::Unified + } else { + // In bootc storage but not composefs-tagged and not an LBI — + // the host image is present in cstorage but composefs import + // is not yet complete. + ImageListTypeColumn::Partial + }; + names.into_iter().map(move |name| ImageOutput { + image: name, + image_type: image_type.clone(), + }) }) .collect()) } @@ -175,6 +252,411 @@ pub(crate) async fn list_entrypoint( Ok(()) } +/// Collect the set of OCI manifest digests referenced by all live composefs-native +/// deployments (booted, rollback, staged). +/// +/// Each live deployment has a state-dir origin file that records the manifest +/// digest of its image. Images whose manifest is NOT in this set are stale +/// GC roots that will be cleaned up by `bootc internals composefs-gc`; they +/// should not be treated as fsck failures even if they are absent from +/// containers-storage (the rollback image is the canonical example: after a +/// `bootc switch`, `prune_container_store` may have removed it). +/// +/// Returns an empty set on ostree-backed systems (no `state/deploy/` directory). +fn collect_live_deployment_manifest_digests(sysroot: &Storage) -> std::collections::HashSet { + use crate::bootc_composefs::state::read_origin; + use crate::composefs_consts::{ORIGIN_KEY_IMAGE, ORIGIN_KEY_MANIFEST_DIGEST, STATE_DIR_RELATIVE}; + + let mut digests = std::collections::HashSet::new(); + let Ok(state_base) = sysroot.physical_root.open_dir(STATE_DIR_RELATIVE) else { + // No composefs state dir — not a composefs system or no deployments yet. + return digests; + }; + let entries = match state_base.entries_utf8() { + Ok(e) => e, + Err(_) => return digests, + }; + for entry in entries.flatten() { + let Ok(ft) = entry.file_type() else { continue }; + if !ft.is_dir() { + continue; + } + let Ok(verity) = entry.file_name() else { continue }; + match read_origin(&sysroot.physical_root, &verity) { + Ok(Some(ini)) => { + if let Some(digest) = + ini.get::(ORIGIN_KEY_IMAGE, ORIGIN_KEY_MANIFEST_DIGEST) + { + digests.insert(digest); + } + } + Ok(None) => {} // State dir exists but no origin yet (interrupted deploy). + Err(e) => { + tracing::warn!("Failed to read origin for deployment {verity}: {e:#}"); + } + } + } + digests +} + +/// JSON-serializable report from `bootc internals fsck images`. +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub(crate) struct FsckReport { + /// Whether unified storage is enabled on this system. + pub unified_storage_enabled: bool, + /// Whether the filesystem supports reflinks (FICLONE). `None` if the probe failed. + pub reflinks_supported: Option, + /// Per-image check results for live unified-storage images. + pub images: Vec, + /// Overall pass/fail: true if all checks passed. + pub ok: bool, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub(crate) struct FsckImageResult { + /// Human-readable image name (from OCI label or composefs tag). + pub name: String, + /// Whether this image is present in the composefs repo. + pub in_composefs: bool, + /// Whether this image is present in bootc's containers-storage. + pub in_containers_storage: bool, + /// Whether a repair was attempted (only when --repair is passed). + pub repaired: bool, +} + +/// Check image store consistency across containers-storage and composefs. +/// +/// Enumerates all bootc-tagged images in the composefs OCI repo and verifies +/// that each is also present in bootc's containers-storage. Returns `true` +/// if all checks pass, `false` if any inconsistency is found. +/// +/// When `repair` is `true`, images missing from containers-storage are +/// restored by re-importing from the composefs repo. When `json` is `true`, +/// emits a machine-readable [`FsckReport`] instead of human-readable output. +/// +/// When unified storage is not enabled (no `bootc.json` or `unified = "disabled"`), +/// prints a note and returns `true` — nothing to check. +pub(crate) async fn fsck_images(sysroot: &Storage, repair: bool, json: bool) -> anyhow::Result { + // Check if unified storage is enabled on this system. + let meta = crate::store::BootcRepoMeta::read(&sysroot.physical_root)?; + let unified_enabled = meta + .map(|m| m.unified != crate::spec::UnifiedStorageState::Disabled) + .unwrap_or(false); + + let reflinks_supported = match sysroot.reflinks_supported() { + Ok(v) => Some(v), + Err(_) => None, + }; + + if !unified_enabled { + if json { + let report = FsckReport { + unified_storage_enabled: false, + reflinks_supported, + images: vec![], + ok: true, + }; + println!("{}", serde_json::to_string_pretty(&report)?); + } else { + println!("bootc fsck images: unified storage not enabled; nothing to check"); + } + return Ok(true); + } + + if !json { + println!("bootc fsck: checking image stores"); + } + + // Warn if the filesystem does not support reflinks — unified storage + // depends on extent sharing between the composefs and ostree repos. + if !json { + match reflinks_supported { + Some(true) => tracing::debug!("Reflink probe: filesystem supports FICLONE"), + Some(false) => { + println!( + "bootc fsck: WARNING: filesystem does not support reflinks (FICLONE); \ + composefs and ostree object stores are NOT sharing blocks" + ); + } + None => {} + } + } + + // Use composefs tags as the authoritative registry: only images that bootc + // explicitly tagged (via pull_via_composefs) are in scope for the check. + // LBIs live in containers-storage but are NOT tagged in composefs — they + // are not subject to this check. + let cfs_repo = match sysroot.get_ensure_composefs() { + Ok(r) => r, + Err(_) => { + if json { + let report = FsckReport { + unified_storage_enabled: true, + reflinks_supported, + images: vec![], + ok: true, + }; + println!("{}", serde_json::to_string_pretty(&report)?); + } else { + println!("bootc fsck: composefs repo not found; nothing to check"); + } + return Ok(true); + } + }; + + let bootc_tags: Vec<(String, composefs_oci::OciDigest)> = + composefs_oci::list_refs(&*cfs_repo) + .context("Listing composefs OCI refs")? + .into_iter() + .filter(|(tag, _)| tag.starts_with(BOOTC_TAG_PREFIX)) + .collect(); + + if bootc_tags.is_empty() { + if json { + let report = FsckReport { + unified_storage_enabled: true, + reflinks_supported, + images: vec![], + ok: true, + }; + println!("{}", serde_json::to_string_pretty(&report)?); + } else { + println!("bootc fsck: no bootc-managed images in composefs; nothing to check"); + } + return Ok(true); + } + + // Build a set of manifest digests for all live deployments (booted, + // rollback, staged). Composefs tags whose manifest is not in this set + // are stale GC roots — they will be removed by the next composefs-gc run. + // It is NOT a fsck failure if such an image is absent from + // containers-storage: the rollback's image is the canonical example, as + // `prune_container_store` may legitimately have removed it after a switch. + // + // On ostree-backed systems `state/deploy/` doesn't exist and the function + // returns an empty set. Additionally, the manifest digest in the + // composefs repo (from `composefs_oci::pull` after a skopeo copy) may + // differ from the digest recorded in the ostree deployment origin due to + // layer re-encoding. So on ostree systems we skip the live-deployment + // filter entirely and treat every bootc-tagged image as live — there are + // no stale GC tags to worry about on those systems yet. + let live_manifest_digests = collect_live_deployment_manifest_digests(sysroot); + let skip_live_filter = live_manifest_digests.is_empty(); + + // Build the set of image IDs (config sha256) present in containers-storage. + // We match by config digest rather than manifest digest because podman may + // report a different manifest digest than composefs when layers are + // recompressed during the copy. The config digest is stable. + let cstor_images = { + let run = cap_std_ext::cap_std::fs::Dir::open_ambient_dir( + "/run", + cap_std_ext::cap_std::ambient_authority(), + ) + .context("Opening /run")?; + let imgstore = crate::podstorage::CStorage::create( + &sysroot.physical_root, + &run, + None, + ) + .context("Opening containers-storage")?; + imgstore + .list_images() + .await + .context("Listing containers-storage images")? + }; + // Map from config-digest (image ID) → names for cross-referencing. + let cstor_id_to_names: std::collections::HashMap> = cstor_images + .into_iter() + .map(|entry| (entry.id, entry.names.unwrap_or_default())) + .collect(); + + let mut n_ok = 0usize; + let mut n_fail = 0usize; + let mut images = Vec::new(); + + for (tag, manifest_digest) in &bootc_tags { + use composefs_ctl::composefs::fsverity::Sha512HashValue; + use composefs_ctl::composefs_oci::OciImage; + + // Get config digest and human-readable name from the composefs OCI image. + let oci_img = OciImage::::open(&cfs_repo, manifest_digest, None).ok(); + let config_digest = oci_img + .as_ref() + .map(|img| img.manifest().config().digest().to_string()); + let display_name = oci_img + .as_ref() + .and_then(|img| { + img.config() + .and_then(|cfg| cfg.config().as_ref()) + .and_then(|c| c.labels().as_ref()) + .and_then(|l| l.get("org.opencontainers.image.ref.name").cloned()) + }) + .unwrap_or_else(|| tag.clone()); + + // Only enforce containers-storage presence for images that back a live + // deployment. Tags for stale images (e.g. a prior rollback that was + // displaced by a second upgrade) will be cleaned up by composefs-gc and + // are harmless to skip here. + // + // On ostree-backed systems `skip_live_filter` is true (no state/deploy + // dir), so all bootc-tagged images are checked. + if !skip_live_filter { + let is_live = live_manifest_digests.contains(manifest_digest.to_string().as_str()); + if !is_live { + tracing::debug!( + "Skipping stale composefs tag {tag} ({manifest_digest}): not a live deployment" + ); + continue; + } + } + + // Check cstorage by config digest (image ID). + let mut in_cstor = if let Some(ref cdig) = config_digest { + // podman IDs are bare hex; digests are "sha256:" + let bare = cdig.strip_prefix("sha256:").unwrap_or(cdig.as_str()); + cstor_id_to_names.contains_key(bare) || cstor_id_to_names.contains_key(cdig.as_str()) + } else { + false + }; + + let mut repaired = false; + + // in_composefs is always true here: we iterated over composefs list_refs + if in_cstor { + if !json { + println!(" [OK] {display_name} (composefs: yes, containers-storage: yes)"); + } + n_ok += 1; + } else if repair { + if !json { + println!(" [REPAIR] Restoring {display_name} to containers-storage..."); + } + repaired = true; + match repair_image_to_containers_storage(&cfs_repo, manifest_digest).await { + Ok(()) => { + in_cstor = true; + if !json { + println!( + " [REPAIRED] Successfully restored {display_name} to containers-storage" + ); + } + n_ok += 1; + } + Err(e) => { + if !json { + println!(" [REPAIR FAILED] {display_name}: {e:#}"); + } + n_fail += 1; + } + } + } else { + if !json { + println!( + " [FAIL] {display_name} - in composefs but not in containers-storage" + ); + println!( + " Hint: re-run with `--repair` to restore, or `bootc upgrade` to re-pull" + ); + } + n_fail += 1; + } + + images.push(FsckImageResult { + name: display_name, + in_composefs: true, + in_containers_storage: in_cstor, + repaired, + }); + } + + let ok = n_fail == 0; + + if json { + let report = FsckReport { + unified_storage_enabled: true, + reflinks_supported, + images, + ok, + }; + println!("{}", serde_json::to_string_pretty(&report)?); + Ok(ok) + } else { + if ok { + println!("bootc fsck: all checks passed ({n_ok} image(s) OK)"); + Ok(true) + } else { + println!("bootc fsck: {n_fail} failure(s) found ({n_ok} OK, {n_fail} FAIL)"); + Ok(false) + } + } +} + +/// Restore an image from the composefs repo into bootc containers-storage. +/// +/// Used by `bootc internals fsck --repair` when an image is present in +/// composefs (has a bootc GC tag) but missing from containers-storage. +/// +/// Opens the OCI manifest and config stored in the composefs splitstreams, +/// then re-exports the image to containers-storage via skopeo. +async fn repair_image_to_containers_storage( + cfs_repo: &crate::store::ComposefsRepository, + manifest_digest: &composefs_oci::OciDigest, +) -> Result<()> { + use composefs_ctl::composefs::fsverity::Sha512HashValue; + use composefs_ctl::composefs_oci::OciImage; + + // Ensure floating containers-storage is set up before we try to write to it. + crate::podstorage::ensure_floating_c_storage_initialized(); + + // Load the manifest and config from the composefs splitstreams. + let oci_image = OciImage::::open(cfs_repo, manifest_digest, None) + .with_context(|| { + format!("Opening OCI image for manifest digest {manifest_digest} from composefs") + })?; + + let manifest = oci_image.manifest().clone(); + let config = oci_image + .config() + .cloned() + .ok_or_else(|| anyhow::anyhow!("OCI image {manifest_digest} has no config (artifact?)"))?; + + let imginfo = ImgConfigManifest { config, manifest }; + + // Derive a containers-storage name from the manifest digest. + // Use the org.opencontainers.image.ref.name label if available, otherwise + // synthesise a name from the digest so the image is addressable. + let image_name = imginfo + .config + .config() + .as_ref() + .and_then(|c| c.labels().as_ref()) + .and_then(|l| l.get("org.opencontainers.image.ref.name")) + .cloned() + .unwrap_or_else(|| { + // Strip the "sha256:" prefix and take the first 12 hex chars as a short id. + let digest_str = manifest_digest.to_string(); + let short = digest_str + .strip_prefix("sha256:") + .unwrap_or(&digest_str) + .get(..12) + .unwrap_or(&digest_str); + format!("localhost/bootc-recovered:{short}") + }); + + let dest_imgref = ImageReference { + transport: Transport::ContainerStorage, + name: image_name, + }; + + crate::bootc_composefs::export::export_composefs_to_dest(cfs_repo, &imginfo, &dest_imgref) + .await + .with_context(|| { + format!("Exporting composefs image {manifest_digest} to containers-storage") + }) +} + /// Returns the source and target ImageReference /// If the source isn't specified, we use booted image /// If the target isn't specified, we push to containers-storage with our default image @@ -235,6 +717,52 @@ pub(crate) async fn push_entrypoint( let ostree = storage.get_ostree()?; let repo = &ostree.repo(); + // Images pulled via the composefs-unified pipeline are stored in the composefs + // repository with a synthesized ostree commit that has no per-layer blob refs. + // Detect this case and fall through to the composefs export path which reads + // directly from the composefs splitstreams instead of ostree blob refs. + let ostree_ref = ostree_ext::container::store::ref_for_image(&source)?; + if let Some(rev) = repo.resolve_rev(&ostree_ref, true)? { + let (commit_obj, _) = repo.load_commit(rev.as_str())?; + let commit_meta = + ostree_ext::ostree::glib::VariantDict::new(Some(&commit_obj.child_value(0))); + let is_composefs_synthesized = commit_meta + .lookup::(ostree_ext::container::store::META_COMPOSEFS_SYNTHESIZED)? + .unwrap_or(false); + + if is_composefs_synthesized { + // The manifest and config are embedded in the commit metadata. + let manifest_str = commit_meta + .lookup::(ostree_ext::container::store::META_MANIFEST)? + .ok_or_else(|| anyhow::anyhow!("Composefs-synthesized commit missing manifest"))?; + let config_str = commit_meta + .lookup::(ostree_ext::container::store::META_CONFIG)? + .ok_or_else(|| { + anyhow::anyhow!("Composefs-synthesized commit missing image config") + })?; + + let manifest: ostree_ext::oci_spec::image::ImageManifest = + serde_json::from_str(&manifest_str) + .context("Deserializing manifest from commit metadata")?; + let config: ostree_ext::oci_spec::image::ImageConfiguration = + serde_json::from_str(&config_str) + .context("Deserializing image config from commit metadata")?; + + let imginfo = crate::bootc_composefs::status::ImgConfigManifest { config, manifest }; + let composefs_repo = storage.get_ensure_composefs()?; + + println!("Copying local image {source} to {target} ..."); + crate::bootc_composefs::export::export_composefs_to_dest( + &composefs_repo, + &imginfo, + &target, + ) + .await?; + println!("Pushed: {target}"); + return Ok(()); + } + } + let mut opts = ostree_ext::container::store::ExportToOCIOpts::default(); opts.progress_to_stdout = true; println!("Copying local image {source} to {target} ..."); @@ -261,18 +789,29 @@ pub(crate) async fn imgcmd_entrypoint( /// /// This onboards the system to unified storage for host images so that /// upgrade/switch can use the unified path automatically when the image is present. +/// +/// `enabled_with_copy`: if true, allow byte-copy fallback when reflinks are unavailable +/// (writes `enabled-with-copy` to bootc.json); if false, fail hard if FICLONE is not +/// supported (writes `enabled`). #[context("Setting unified storage for booted image")] -pub(crate) async fn set_unified_entrypoint() -> Result<()> { +pub(crate) async fn set_unified_entrypoint(enabled_with_copy: bool) -> Result<()> { + use composefs_ctl::composefs_oci::LocalFetchOpt; + let local_fetch = if enabled_with_copy { + LocalFetchOpt::IfPossible + } else { + LocalFetchOpt::ZeroCopy + }; + let storage = crate::cli::get_storage().await?; if let crate::store::BootedStorageKind::Composefs(booted_cfs) = storage.kind()? { - return set_unified_composefs(&storage, &booted_cfs).await; + return set_unified_composefs(&storage, &booted_cfs, local_fetch).await; } // Initialize floating c_storage early - needed for container operations crate::podstorage::ensure_floating_c_storage_initialized(); - set_unified(&storage).await + set_unified(&storage, local_fetch).await } /// Composefs implementation of set_unified: pull the booted image into @@ -282,6 +821,7 @@ pub(crate) async fn set_unified_entrypoint() -> Result<()> { async fn set_unified_composefs( storage: &crate::store::Storage, booted_cfs: &crate::store::BootedComposefs, + local_fetch: composefs_ctl::composefs_oci::LocalFetchOpt, ) -> Result<()> { use crate::bootc_composefs::status::get_composefs_status; @@ -307,52 +847,115 @@ async fn set_unified_composefs( let imgstore = storage.get_ensure_imgstore()?; - // Check if the image is already in bootc storage + // Pull into bootc-owned containers-storage if not already there. let img_transport = imgref.to_transport_image()?; if imgstore.exists(&img_transport).await? { - println!("Image {} is already in bootc storage.", imgref.image); tracing::info!( message_id = SET_UNIFIED_CFS_JOURNAL_ID, - bootc.status = "already_unified", + bootc.status = "already_in_bootc_storage", "Image already present in bootc containers-storage", ); - return Ok(()); - } + } else { + // If the image exists in the host's default containers-storage + // (/var/lib/containers), copy from there (avoids network). + // Otherwise, pull from the original transport. + let image_in_host = image_exists_in_host_storage(&imgref.image).await?; - // Pull into bootc-owned containers-storage. - // If the image exists in the host's default containers-storage - // (/var/lib/containers), copy from there (avoids network). - // Otherwise, pull from the original transport. - let image_in_host = image_exists_in_host_storage(&imgref.image).await?; + if image_in_host { + tracing::info!( + "Image {} found in host containers-storage; copying to bootc storage", + &imgref.image + ); + let image_name = imgref.image.clone(); + let copy_msg = format!("Copying {} to bootc storage", &image_name); + async_task_with_spinner(©_msg, async move { + imgstore.pull_from_host_storage(&image_name).await + }) + .await?; + } else { + let pull_ref = img_transport; + let pull_msg = format!("Pulling {} to bootc storage", &pull_ref); + async_task_with_spinner(&pull_msg, async move { + imgstore.pull_with_progress(&pull_ref).await + }) + .await?; + } - if image_in_host { - tracing::info!( - "Image {} found in host containers-storage; copying to bootc storage", - &imgref.image - ); - let image_name = imgref.image.clone(); - let copy_msg = format!("Copying {} to bootc storage", &image_name); - async_task_with_spinner(©_msg, async move { - imgstore.pull_from_host_storage(&image_name).await - }) - .await?; - } else { - let pull_ref = img_transport; - let pull_msg = format!("Pulling {} to bootc storage", &pull_ref); - async_task_with_spinner(&pull_msg, async move { - imgstore.pull_with_progress(&pull_ref).await - }) - .await?; + // Verify + let imgstore = storage.get_ensure_imgstore()?; + let img_transport = imgref.to_transport_image()?; + if !imgstore.exists(&img_transport).await? { + anyhow::bail!( + "Image was pulled but not found in bootc storage: {}", + &imgref.image + ); + } } - // Verify - let imgstore = storage.get_ensure_imgstore()?; - let img_transport = imgref.to_transport_image()?; - if !imgstore.exists(&img_transport).await? { - anyhow::bail!( - "Image was pulled but not found in bootc storage: {}", - &imgref.image - ); + // On a composefs-booted system the image is already in the composefs OCI + // repo (it was imported at deploy time), but it may not yet have a bootc + // GC tag. Ensure the tag exists so `bootc internals fsck images` and + // future composefs-gc runs see this image as a live GC root. + { + use crate::bootc_composefs::state::read_origin; + use crate::composefs_consts::{ORIGIN_KEY_IMAGE, ORIGIN_KEY_MANIFEST_DIGEST}; + use composefs_ctl::composefs_oci::tag_image; + + // The booted deployment ID is its fsverity digest (ComposefsCmdline.digest). + let deployment_id = &booted_cfs.cmdline.digest; + + // The manifest digest is stored in the .origin file by pull_via_composefs. + let ini = read_origin(&storage.physical_root, deployment_id) + .context("Reading booted deployment origin for composefs GC tag")?; + if let Some(ini) = ini { + if let Some(manifest_digest_str) = + ini.get::(ORIGIN_KEY_IMAGE, ORIGIN_KEY_MANIFEST_DIGEST) + { + let manifest_digest: composefs_oci::OciDigest = manifest_digest_str + .parse() + .context("Parsing manifest digest from origin")?; + let cfs_repo = storage.get_ensure_composefs()?; + let tag = crate::bootc_composefs::repo::bootc_tag_for_manifest(&manifest_digest_str); + + let existing_refs = composefs_oci::list_refs(&*cfs_repo) + .context("Listing composefs refs")?; + if existing_refs.iter().any(|(t, _)| t == &tag) { + tracing::info!("Bootc GC tag {tag} already exists in composefs repo"); + } else { + tracing::info!( + "Writing bootc GC tag {tag} for booted image in composefs repo" + ); + tag_image(&*cfs_repo, &manifest_digest, &tag) + .context("Tagging booted image as bootc GC root in composefs repo")?; + } + } else { + tracing::warn!( + "No manifest digest in origin for deployment {deployment_id}; \ + bootc GC tag not written (legacy deployment?)" + ); + } + } else { + tracing::warn!( + "No origin file for deployment {deployment_id}; \ + bootc GC tag not written" + ); + } + } + + // Write composefs/bootc.json so pull_via_composefs routes through the + // three-store unified pipeline on all subsequent upgrades/switches. + { + use composefs_ctl::composefs_oci::LocalFetchOpt; + let mut meta = crate::store::BootcRepoMeta::read(&storage.physical_root)? + .unwrap_or_default(); + meta.version = 1; + meta.unified = if local_fetch == LocalFetchOpt::ZeroCopy { + crate::spec::UnifiedStorageState::Enabled + } else { + crate::spec::UnifiedStorageState::EnabledWithCopy + }; + meta.write(&storage.physical_root) + .context("Writing unified-storage flag to composefs/bootc.json")?; } tracing::info!( @@ -366,7 +969,10 @@ async fn set_unified_composefs( /// Inner implementation of set_unified for ostree that accepts a storage reference. #[context("Setting unified storage for booted image")] -pub(crate) async fn set_unified(sysroot: &crate::store::Storage) -> Result<()> { +pub(crate) async fn set_unified( + sysroot: &crate::store::Storage, + local_fetch: composefs_ctl::composefs_oci::LocalFetchOpt, +) -> Result<()> { let ostree = sysroot.get_ostree()?; let repo = &ostree.repo(); @@ -482,6 +1088,25 @@ pub(crate) async fn set_unified(sysroot: &crate::store::Storage) -> Result<()> { }) .await?; } else { + // Image not in host containers-storage and source is a remote registry. + // + // TODO: The ideal path here is `import_from_composefs_repo` — synthesize + // the OCI content directly from the composefs object store into bootc's + // containers-storage with FICLONE reflinks (zero physical copy, same as + // the install-time pipeline). This requires the composefs repo to already + // be populated for this image, which is only true if the system was + // installed with --experimental-unified-storage. For a plain ostree + // system that is onboarding via `set-unified`, the composefs repo is + // empty, so we must re-pull from the registry to populate it correctly. + // + // The ostree→OCI export path (`container::store::export`) is deliberately + // NOT used here because it re-serializes ostree objects through the tar + // pipeline (no reflinks, two full physical copies: ostree→OCI→cstor). + // That would defeat the entire purpose of unified storage. + // + // So for registry-transport systems `set-unified` requires network access. + // Systems installed via containers-storage (e.g. bootc image cmd build) + // always have the image locally, so they take the fast path above. let img_string = imgref.to_transport_image()?; let pull_msg = format!("Pulling {} to bootc storage", &img_string); async_task_with_spinner(&pull_msg, async move { @@ -504,18 +1129,54 @@ pub(crate) async fn set_unified(sysroot: &crate::store::Storage) -> Result<()> { } tracing::info!("Image verified in bootc storage: {}", &imgref.image); - // Optionally verify we can import from containers-storage by preparing in a temp importer - // without actually importing into the main repo; this is a lightweight validation. - let containers_storage_imgref = crate::spec::ImageReference { - transport: "containers-storage".to_string(), - image: imgref.image.clone(), - signature: imgref.signature.clone(), - }; - let ostree_imgref = - ostree_ext::container::OstreeImageReference::from(containers_storage_imgref); - let _ = - ostree_ext::container::store::ImageImporter::new(repo, &ostree_imgref, Default::default()) - .await?; + // Import the image from containers-storage into the composefs OCI repo and tag it + // as a bootc GC root. This mirrors Stage 2 of pull_via_composefs in deploy.rs so + // that `bootc image list` shows the image as fully present (not "partial"). + { + use composefs_ctl::composefs_oci::{PullOptions, tag_image}; + + let cfs_repo = sysroot.get_ensure_composefs()?; + let cstor_imgref_str = format!("containers-storage:{}", imgref.image); + let storage_path = format!( + "{}/{}", + sysroot.physical_root_path, + CStorage::subpath() + ); + tracing::info!( + "Importing {} from containers-storage into composefs repo", + &imgref.image + ); + let pull_opts = PullOptions { + local_fetch, + storage_root: Some(std::path::Path::new(&storage_path)), + ..Default::default() + }; + let pull_result = composefs_oci::pull(&cfs_repo, &cstor_imgref_str, None, pull_opts) + .await + .context("Importing from containers-storage into composefs repo")?; + + let tag = crate::bootc_composefs::repo::bootc_tag_for_manifest( + &pull_result.manifest_digest.to_string(), + ); + tag_image(&*cfs_repo, &pull_result.manifest_digest, &tag) + .context("Tagging pulled image as bootc GC root in composefs repo")?; + } + + // Ensure the composefs directory exists (it may not on a fresh ostree system + // that has never run pull_via_composefs), then write the bootc.json flag. + crate::store::ensure_composefs_dir(&sysroot.physical_root)?; + { + let mut meta = crate::store::BootcRepoMeta::read(&sysroot.physical_root)? + .unwrap_or_default(); + meta.version = 1; + meta.unified = if local_fetch == composefs_ctl::composefs_oci::LocalFetchOpt::ZeroCopy { + crate::spec::UnifiedStorageState::Enabled + } else { + crate::spec::UnifiedStorageState::EnabledWithCopy + }; + meta.write(&sysroot.physical_root) + .context("Writing unified-storage flag to composefs/bootc.json")?; + } tracing::info!( message_id = SET_UNIFIED_JOURNAL_ID, diff --git a/crates/lib/src/install.rs b/crates/lib/src/install.rs index c9fcaf88c..b2762b251 100644 --- a/crates/lib/src/install.rs +++ b/crates/lib/src/install.rs @@ -69,6 +69,16 @@ //! stronger integrity guarantees via fs-verity and supports UKI (Unified Kernel //! Images) for measured boot scenarios. //! +//! ### Unified Storage (Experimental) +//! +//! When [`InstallTargetOpts::unified_storage_exp`] is set (via `--experimental-unified-storage` +//! or the `[install.storage] unified` config key), the ostree backend uses a three-stage +//! pipeline: containers-storage → composefs OCI repo (zero-copy reflinks) → ostree commit +//! synthesis. This keeps the image simultaneously visible in all three stores so that +//! `podman`, the composefs overlay, and ostree all reference the same on-disk data. +//! +//! See [`crate::deploy::pull_via_composefs`] for the implementation. +//! //! ## Discoverable Partitions Specification (DPS) //! //! As of bootc 1.11, partitions are created with DPS type GUIDs from the @@ -113,20 +123,36 @@ //! //! ## Configuration //! -//! Installation is configured via TOML files loaded from multiple paths in -//! systemd-style priority order: +//! Installation is configured via TOML files loaded by [`config::load_config`] +//! (via `liboverdrop::scan`) from the running container's filesystem. The standard +//! location for image authors is `/usr/lib/bootc/install/`. All four conventional +//! bases are scanned in order of increasing priority: //! -//! - `/usr/lib/bootc/install/*.toml` - Distribution/image defaults -//! - `/etc/bootc/install/*.toml` - Local overrides +//! - `/usr/lib/bootc/install/*.toml` +//! - `/usr/local/lib/bootc/install/*.toml` +//! - `/etc/bootc/install/*.toml` +//! - `/run/bootc/install/*.toml` //! -//! Files are merged alphanumerically, with higher-numbered files taking precedence. +//! Files are merged alphanumerically within each directory; later directories override +//! earlier ones for scalar fields, and list fields (e.g. `kargs`) are appended. //! See [`config::InstallConfiguration`] for the schema. //! +//! When `--source-imgref` is used, config is read from the **host** filesystem rather +//! than the target image, since the image has not been fetched yet at config-load time. +//! //! Key configurable options include: //! - Root filesystem type (xfs, ext4, btrfs) //! - Allowed block setups (direct, tpm2-luks) //! - Default kernel arguments //! - Architecture-specific overrides +//! - Unified storage mode (see [`config::UnifiedStorage`]) +//! +//! ## Install Source +//! +//! By default, `bootc install` must run inside a privileged container; it detects the +//! running image via `/run/.containerenv`. When `--source-imgref` is given, installation +//! can be driven from an external image reference (registry, OCI dir, etc.) without running +//! inside a container. //! //! ## Submodules //! @@ -277,8 +303,9 @@ pub(crate) struct InstallTargetOpts { #[serde(default)] pub(crate) run_fetch_check: bool, - /// Verify the image can be fetched from the bootc image. Updates may fail when the installation - /// host is authenticated with the registry but the pull secret is not in the bootc image. + /// Skip the fetch check (inverse of `--run-fetch-check`). When true, the pre-install + /// registry reachability probe is skipped. Useful when the caller has already verified + /// connectivity or when installing in an air-gapped environment. #[clap(long)] #[serde(default)] pub(crate) skip_fetch_check: bool, @@ -604,7 +631,9 @@ pub(crate) struct InstallPrintConfigurationOpts { /// Global state captured from the container. #[derive(Debug, Clone)] pub(crate) struct SourceInfo { - /// Image reference we'll pull from (today always containers-storage: type) + /// Image reference to pull from. In the default (self-install) case this is a + /// `containers-storage:` reference to the running container's image. When + /// `--source-imgref` is given it may be any OCI transport (registry, oci-archive, etc.). pub(crate) imageref: ostree_container::ImageReference, /// The digest to use for pulls pub(crate) digest: Option, @@ -614,7 +643,10 @@ pub(crate) struct SourceInfo { pub(crate) in_host_mountns: bool, } -// Shared read-only global state +/// Shared read-only state built during [`prepare_install`] and used throughout the install. +/// +/// Holds the resolved source image reference, target image reference, install configuration, +/// SELinux state, and various options. Immutable after construction. #[derive(Debug)] pub(crate) struct State { pub(crate) source: SourceInfo, @@ -641,6 +673,10 @@ pub(crate) struct State { // If Some, then --composefs_native is passed pub(crate) composefs_options: InstallComposefsOpts, + + /// Local fetch option for unified storage layer import into the composefs repo. + /// Only meaningful when `target_opts.unified_storage_exp` is true. + pub(crate) unified_storage_local_fetch: composefs_ctl::composefs_oci::LocalFetchOpt, } // Shared read-only global state @@ -1019,6 +1055,12 @@ async fn initialize_ostree_root(state: &State, root_setup: &RootSetup) -> Result Ok((storage, has_ostree)) } +/// Pull the source image and create the initial ostree deployment. +/// +/// This is the core installation step. It fetches the container image (either via the +/// unified storage pipeline or the traditional ostree import path) and synthesizes an +/// ostree deployment from it. The returned deployment and aleph record are used by the +/// caller to write BLS/bootloader entries and finalize the install. #[context("Creating ostree deployment")] async fn install_container( state: &State, @@ -1070,25 +1112,30 @@ async fn install_container( // Auto-detection (None) is only appropriate for upgrade/switch on a running system. let use_unified = state.target_opts.unified_storage_exp; - let prepared = if use_unified { - tracing::info!("Using unified storage path for installation"); - crate::deploy::prepare_for_pull_unified( + let pulled_image = if use_unified { + tracing::info!("Using unified storage path (composefs→ostree) for installation"); + crate::deploy::pull_via_composefs( repo, &spec_imgref, - Some(&state.target_imgref), storage, - None, + state.unified_storage_local_fetch, ) .await? } else { - prepare_for_pull(repo, &spec_imgref, Some(&state.target_imgref), None).await? - }; - - let pulled_image = match prepared { - PreparedPullResult::AlreadyPresent(existing) => existing, - PreparedPullResult::Ready(image_meta) => { - crate::deploy::check_disk_space_ostree(repo, &image_meta, &spec_imgref)?; - pull_from_prepared(&spec_imgref, false, ProgressWriter::default(), *image_meta).await? + let prepared = + prepare_for_pull(repo, &spec_imgref, Some(&state.target_imgref), None).await?; + match prepared { + PreparedPullResult::AlreadyPresent(existing) => existing, + PreparedPullResult::Ready(image_meta) => { + crate::deploy::check_disk_space_ostree(repo, &image_meta, &spec_imgref)?; + pull_from_prepared( + &spec_imgref, + false, + ProgressWriter::default(), + *image_meta, + ) + .await? + } } }; @@ -1208,7 +1255,7 @@ async fn install_container( &root_setup.physical_root, &mut pathbuf, policy, - Some(deployment_root_devino), + &[deployment_root_devino], ) .with_context(|| format!("Recursive SELinux relabeling of {d}"))?; } @@ -1537,7 +1584,12 @@ async fn verify_target_fetch( Ok(()) } -/// Preparation for an install; validates and prepares some (thereafter immutable) global state. +/// Validate options and build the shared [`State`] used throughout the install. +/// +/// Loads install configuration from TOML drop-ins (`/usr/lib/bootc/install/*.toml` and +/// overrides), resolves the install source (either the running container or `--source-imgref`), +/// and merges CLI flags with config-file values. The returned `State` is immutable for the +/// rest of the install. async fn prepare_install( mut config_opts: InstallConfigOpts, source_opts: InstallSourceOpts, @@ -1592,6 +1644,13 @@ async fn prepare_install( // Load install configuration from TOML drop-in files early, so that // config values are available when constructing the target image reference. let install_config = config::load_config()?; + if external_source { + tracing::warn!( + "install config loaded from host filesystem; \ + image-embedded install config (e.g. storage.unified) is not consulted \ + when --source-imgref is used" + ); + } if let Some(ref config) = install_config { tracing::debug!("Loaded install configuration"); // Merge config file values into config_opts (CLI takes precedence) @@ -1612,6 +1671,14 @@ async fn prepare_install( target_opts.enforce_container_sigpolicy = config.enforce_container_sigpolicy.unwrap_or(false); } + + if !target_opts.unified_storage_exp { + if let Some(unified) = config.storage.as_ref().and_then(|s| s.unified) { + if unified.is_enabled() { + target_opts.unified_storage_exp = true; + } + } + } } else { tracing::debug!("No install configuration found"); } @@ -1783,6 +1850,21 @@ async fn prepare_install( .map(|p| std::fs::read_to_string(p).with_context(|| format!("Reading {p}"))) .transpose()?; + // Resolve the local fetch option from the install config. The CLI flag + // (--experimental-unified-storage) maps to Enabled (strict reflinks); the + // config file can select EnabledWithCopy to allow a copy fallback. + let unified_storage_local_fetch = if target_opts.unified_storage_exp { + install_config + .as_ref() + .and_then(|c| c.storage.as_ref()) + .and_then(|s| s.unified) + .map(|u| u.local_fetch_opt()) + // CLI flag with no config file → default to strict (Enabled semantics) + .unwrap_or(composefs_ctl::composefs_oci::LocalFetchOpt::ZeroCopy) + } else { + composefs_ctl::composefs_oci::LocalFetchOpt::IfPossible + }; + // Create our global (read-only) state which gets wrapped in an Arc // so we can pass it to worker threads too. Right now this just // combines our command line options along with some bind mounts from the host. @@ -1800,6 +1882,7 @@ async fn prepare_install( host_is_container, composefs_required, composefs_options, + unified_storage_local_fetch, }); Ok(state) @@ -1987,6 +2070,12 @@ async fn ostree_install(state: &State, rootfs: &RootSetup, cleanup: Cleanup) -> Ok(()) } +/// Core implementation shared by all `bootc install` subcommands. +/// +/// Called after the target filesystem is set up. Handles SELinux, mounts, kargs, +/// bound images, and invokes [`install_container`] to pull and deploy the image. +/// Delegates to either the unified storage pipeline or the traditional ostree path +/// depending on [`State::target_opts`]. async fn install_to_filesystem_impl( state: &State, rootfs: &mut RootSetup, @@ -2042,6 +2131,7 @@ async fn install_to_filesystem_impl( rootfs, state.composefs_options.allow_missing_verity, state.target_opts.unified_storage_exp, + state.unified_storage_local_fetch, ) .await?; @@ -2093,7 +2183,34 @@ async fn install_to_filesystem_impl( if let Some(policy) = state.load_policy()? { tracing::info!("Performing final SELinux relabeling of physical root"); let mut path = Utf8PathBuf::from(""); - crate::lsm::ensure_dir_labeled_recurse(&rootfs.physical_root, &mut path, &policy, None) + // Ostree sets the immutable ext4 attribute on each deployment checkout + // directory, which causes lsetfilecon() to return EPERM. Those dirs + // are already correctly labeled by the earlier composefs import pass, + // so skip each checkout by dev/ino. We enumerate them directly rather + // than skipping the whole ostree/deploy subtree so that the stateroot + // and var directories (which are not immutable) still get labeled. + let mut skip: Vec<(libc::dev_t, libc::ino64_t)> = Vec::new(); + let deploy_dir = "ostree/deploy"; + if let Some(statered_dir) = rootfs.physical_root.open_dir_optional(deploy_dir)? { + for stateroot in statered_dir.entries()? { + let stateroot = stateroot?; + if !stateroot.file_type()?.is_dir() { + continue; + } + let deploy_subdir = stateroot.open_dir()?.open_dir_optional("deploy")?; + if let Some(deploy_subdir) = deploy_subdir { + for checkout in deploy_subdir.entries()? { + let checkout = checkout?; + if !checkout.file_type()?.is_dir() { + continue; + } + let m = checkout.metadata()?; + skip.push((m.dev() as libc::dev_t, m.ino() as libc::ino64_t)); + } + } + } + } + crate::lsm::ensure_dir_labeled_recurse(&rootfs.physical_root, &mut path, &policy, &skip) .context("Final SELinux relabeling of physical root")?; } else { tracing::debug!("Skipping final SELinux relabel (SELinux is disabled)"); @@ -2726,6 +2843,11 @@ pub(crate) async fn install_to_filesystem( Ok(()) } +/// Implementation of the `bootc install to-existing-root` CLI command. +/// +/// Converts the running host (or a mounted filesystem) into a bootc-managed system +/// in-place. Unlike `to-disk`, the caller is responsible for setting up storage; +/// this command installs into the ostree sysroot at the host root. pub(crate) async fn install_to_existing_root(opts: InstallToExistingRootOpts) -> Result<()> { // Log the existing root installation operation to systemd journal const INSTALL_EXISTING_ROOT_JOURNAL_ID: &str = "7c6d5e4f3a2b1c0d9e8f7a6b5c4d3e2f1"; diff --git a/crates/lib/src/install/config.rs b/crates/lib/src/install/config.rs index 7f123e45f..7d9e5bb72 100644 --- a/crates/lib/src/install/config.rs +++ b/crates/lib/src/install/config.rs @@ -1,6 +1,48 @@ //! # Configuration for `bootc install` //! -//! This module handles the TOML configuration file for `bootc install`. +//! Install-time configuration is read from TOML files embedded in the container image +//! and merged in systemd-style priority order via [`liboverdrop::scan`]. All paths are +//! resolved inside the **running container's** filesystem at install time. +//! +//! **Image authors should place configuration exclusively under `/usr/lib/bootc/install/`.** +//! This is the standard, portable location and is part of the image's read-only content. +//! +//! The following directories are also scanned (in order of increasing priority), but +//! should not be used in practice: +//! +//! - `/usr/lib/bootc/install/*.toml` ← **use this** +//! - `/usr/local/lib/bootc/install/*.toml` — scanned for completeness; avoid +//! - `/etc/bootc/install/*.toml` — scanned for completeness; avoid +//! - `/run/bootc/install/*.toml` — scanned for completeness; avoid +//! +//! Within each directory, files are processed in alphanumeric order. Later entries +//! override earlier ones for scalar fields; list fields such as `kargs` are appended. +//! +//! ## Format +//! +//! Each file contains a single `[install]` table. Example: +//! +//! ```toml +//! [install] +//! root-fs-type = "xfs" +//! kargs = ["console=ttyS0,115200n8"] +//! +//! [install.storage] +//! unified = "enabled-with-copy" +//! ``` +//! +//! ## Unified storage +//! +//! The `[install.storage]` section controls whether unified storage (composefs + ostree) +//! is activated at install time. See [`UnifiedStorage`] for the three values: +//! `disabled` (default), `enabled` (fail if reflinks unavailable), and +//! `enabled-with-copy` (fall back to byte copies). +//! +//! ## Source-imageref limitation +//! +//! When `bootc install` is invoked with `--source-imgref`, configuration is loaded from +//! the **host** filesystem before the target image is available. Image-embedded install +//! config (including `storage.unified`) is therefore not consulted in that scenario. use crate::spec::Bootloader; use anyhow::{Context, Result}; @@ -51,6 +93,39 @@ impl Filesystem { } } +/// Controls unified storage mode (composefs+ostree merged store) at install time. +/// +/// Corresponds to the `[install.storage]` `unified` key in install config TOML files +/// (e.g. `/usr/lib/bootc/install/00-storage.toml`). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(rename_all = "kebab-case")] +pub(crate) enum UnifiedStorage { + /// No unified storage (default). + #[default] + Disabled, + /// Enable unified storage; fail hard if the filesystem does not support reflinks. + Enabled, + /// Enable unified storage; fall back to byte copies if reflinks are unavailable. + EnabledWithCopy, +} + +impl UnifiedStorage { + /// Returns true if unified storage is active (either enabled variant). + pub(crate) fn is_enabled(self) -> bool { + !matches!(self, Self::Disabled) + } + + /// Returns the local fetch option for layer import into the composefs repo: + /// `ZeroCopy` (fail if no reflinks) for `Enabled`, `IfPossible` (copy + /// fallback) for `EnabledWithCopy`. + pub(crate) fn local_fetch_opt(self) -> composefs_ctl::composefs_oci::LocalFetchOpt { + match self { + Self::Enabled => composefs_ctl::composefs_oci::LocalFetchOpt::ZeroCopy, + _ => composefs_ctl::composefs_oci::LocalFetchOpt::IfPossible, + } + } +} + /// The toplevel config entry for installation configs stored /// in bootc/install (e.g. /etc/bootc/install/05-custom.toml) #[derive(Debug, Clone, Serialize, Deserialize, Default)] @@ -81,6 +156,14 @@ pub(crate) struct BasicFilesystems { /// Configuration for ostree repository pub(crate) type OstreeRepoOpts = ostree_ext::repo_options::RepoOptions; +/// Configuration options for storage. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(rename_all = "kebab-case", deny_unknown_fields)] +pub(crate) struct StorageConfiguration { + /// Unified storage mode. See [`UnifiedStorage`] for the three variants. + pub(crate) unified: Option, +} + /// Configuration options for bootupd, responsible for setting up the bootloader. #[derive(Debug, Clone, Serialize, Deserialize, Default)] #[serde(rename_all = "kebab-case", deny_unknown_fields)] @@ -132,6 +215,8 @@ pub(crate) struct InstallConfiguration { /// Enforce that the containers-storage stack has a non-default /// (i.e. not `insecureAcceptAnything`) container image signature policy. pub(crate) enforce_container_sigpolicy: Option, + /// Storage configuration (unified store, etc.) + pub(crate) storage: Option, } fn merge_basic(s: &mut Option, o: Option, _env: &EnvProperties) { @@ -189,6 +274,13 @@ impl Mergeable for OstreeRepoOpts { } } +impl Mergeable for StorageConfiguration { + /// Apply any values in other, overriding any existing values in `self`. + fn merge(&mut self, other: Self, env: &EnvProperties) { + merge_basic(&mut self.unified, other.unified, env) + } +} + impl Mergeable for Bootupd { /// Apply any values in other, overriding any existing values in `self`. fn merge(&mut self, other: Self, env: &EnvProperties) { @@ -226,6 +318,7 @@ impl Mergeable for InstallConfiguration { other.enforce_container_sigpolicy, env, ); + self.storage.merge(other.storage, env); if let Some(other_kargs) = other.kargs { self.kargs .get_or_insert_with(Default::default) @@ -877,6 +970,50 @@ skip-boot-uuid = false // skip_boot_uuid should be overridden to true assert_eq!(install.bootupd.unwrap().skip_boot_uuid.unwrap(), true); } + + #[test] + fn test_parse_storage_unified() { + for (input, expected) in [ + ("disabled", UnifiedStorage::Disabled), + ("enabled", UnifiedStorage::Enabled), + ("enabled-with-copy", UnifiedStorage::EnabledWithCopy), + ] { + let toml_str = format!("[install.storage]\nunified = \"{input}\"\n"); + let c: InstallConfigurationToplevel = toml::from_str(&toml_str).unwrap(); + assert_eq!( + c.install.unwrap().storage.unwrap().unified.unwrap(), + expected, + "input: {input}" + ); + } + + // Absent key → None + let c: InstallConfigurationToplevel = + toml::from_str("[install]\nroot-fs-type = \"xfs\"\n").unwrap(); + assert!(c.install.unwrap().storage.is_none()); + } + + #[test] + fn test_merge_storage_unified() { + let env = EnvProperties { + sys_arch: "x86_64".to_string(), + }; + let mut install: InstallConfiguration = toml::from_str( + "[storage]\nunified = \"disabled\"\n", + ) + .unwrap(); + let other = InstallConfiguration { + storage: Some(StorageConfiguration { + unified: Some(UnifiedStorage::Enabled), + }), + ..Default::default() + }; + install.merge(other, &env); + assert_eq!( + install.storage.unwrap().unified.unwrap(), + UnifiedStorage::Enabled + ); + } } #[test] diff --git a/crates/lib/src/lsm.rs b/crates/lib/src/lsm.rs index 2c059406d..f9b85f308 100644 --- a/crates/lib/src/lsm.rs +++ b/crates/lib/src/lsm.rs @@ -391,13 +391,13 @@ pub(crate) fn relabel_recurse( /// Uses the `walk` API with `noxdev` and `skip_mountpoints` to avoid crossing /// mount point boundaries /// (e.g. into sysfs, procfs, etc.). -/// The provided `skip` parameter is a device/inode pair that we will ignore -/// (and not traverse into). +/// The provided `skip` parameter is a list of device/inode pairs to ignore +/// (and not traverse into). Pass an empty slice to skip nothing. pub(crate) fn ensure_dir_labeled_recurse( root: &Dir, path: &mut Utf8PathBuf, policy: &ostree::SePolicy, - skip: Option<(libc::dev_t, libc::ino64_t)>, + skip: &[(libc::dev_t, libc::ino64_t)], ) -> Result<()> { use cap_std_ext::dirext::WalkConfiguration; use std::ops::ControlFlow; @@ -432,18 +432,20 @@ pub(crate) fn ensure_dir_labeled_recurse( let metadata = component.entry.metadata()?; // Check if this entry should be skipped - if let Some((skip_dev, skip_ino)) = skip { - if (metadata.dev(), metadata.ino()) == (skip_dev, skip_ino) { - tracing::debug!("Skipping dev={skip_dev} inode={skip_ino}"); - // For directories, Break skips traversal into the directory - // but continues with the next sibling. For non-directories, - // Break would skip all remaining siblings, so use Continue - // to skip only this entry. - if component.file_type.is_dir() { - return Ok(ControlFlow::Break(())); - } else { - return Ok(ControlFlow::Continue(())); - } + let devino = ( + metadata.dev() as libc::dev_t, + metadata.ino() as libc::ino64_t, + ); + if skip.contains(&devino) { + tracing::debug!("Skipping dev={} inode={}", devino.0, devino.1); + // For directories, Break skips traversal into the directory + // but continues with the next sibling. For non-directories, + // Break would skip all remaining siblings, so use Continue + // to skip only this entry. + if component.file_type.is_dir() { + return Ok(ControlFlow::Break(())); + } else { + return Ok(ControlFlow::Continue(())); } } diff --git a/crates/lib/src/podman.rs b/crates/lib/src/podman.rs index 9b287fb40..71d707f3c 100644 --- a/crates/lib/src/podman.rs +++ b/crates/lib/src/podman.rs @@ -19,6 +19,11 @@ pub(crate) struct Inspect { pub(crate) struct ImageListEntry { pub(crate) id: String, pub(crate) names: Option>, + /// Manifest digest as reported by podman (may differ from the composefs + /// OCI manifest digest when layers are recompressed during copy). + /// Kept for diagnostic purposes; use `id` (config sha256) for matching. + #[allow(dead_code)] + pub(crate) digest: Option, } /// Given an image ID, return its manifest digest diff --git a/crates/lib/src/podstorage.rs b/crates/lib/src/podstorage.rs index de6c418ed..2d966c622 100644 --- a/crates/lib/src/podstorage.rs +++ b/crates/lib/src/podstorage.rs @@ -443,6 +443,40 @@ impl CStorage { Ok(true) } + /// Copy a `containers-storage:` image into this bootc-owned storage. + /// + /// If `STORAGE_OPTS=additionalimagestore=` is set in the environment + /// (as injected by bcvk via systemd credentials), uses that path as the + /// explicit `--root` for podman — because podman does not search additional + /// image stores when resolving a push source. Otherwise falls back to the + /// default `/var/lib/containers/storage`. + pub(crate) async fn pull_from_containers_storage(&self, image: &str) -> Result<()> { + // If the image is already in bootc's own containers-storage (e.g. built + // via `bootc image cmd build`), no copy is needed. + if self.exists(image).await.unwrap_or(false) { + tracing::debug!("Image {image} already in bootc storage; skipping copy"); + return Ok(()); + } + + let ais = std::env::var("STORAGE_OPTS") + .ok() + .and_then(|v| { + v.split(',') + .find(|s| s.starts_with("additionalimagestore=")) + .and_then(|s| s.strip_prefix("additionalimagestore=")) + .map(str::to_owned) + }); + + if let Some(src_root) = ais { + tracing::debug!( + "STORAGE_OPTS has additionalimagestore={src_root}; using as source storage root" + ); + self.pull_from_storage_root(&src_root, image).await + } else { + self.pull_from_host_storage(image).await + } + } + /// Copy an image from the default container storage (/var/lib/containers/) /// to this storage. #[context("Pulling from host storage: {image}")] @@ -468,6 +502,57 @@ impl CStorage { Ok(()) } + /// Copy an image from an additional image store into this bootc-owned storage. + /// + /// Used during install when the source image lives in a storage instance + /// advertised via `STORAGE_OPTS=additionalimagestore=` (e.g. the bcvk + /// virtiofs mount) rather than in the default `/var/lib/containers/storage`. + /// + /// Uses `skopeo copy` with `STORAGE_OPTS=additionalimagestore=` on + /// the subprocess. Skopeo respects `STORAGE_OPTS` for source image lookup and + /// does not touch libpod state, which avoids two problems: + /// + /// - `podman --root ` triggers a libpod database mismatch when the + /// foreign store was created by a different podman instance. + /// - `podman pull containers-storage:` with an AIS makes the image *visible* but + /// does not materialize it into the destination root storage. + /// + /// The netavark/skopeo-first issue (containers/skopeo#658) does not apply here + /// because `CStorage::create` always runs `podman images` to initialize the + /// bootc-owned storage before this function is ever called. + #[context("Copying from additional image store at {src_root}: {image}")] + pub(crate) async fn pull_from_storage_root( + &self, + src_root: &str, + image: &str, + ) -> Result<()> { + let mut cmd = Command::new(bootc_utils::skopeo_bin()); + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::null()); + + let temp_runroot = TempDir::new(cap_std::ambient_authority())?; + let mut fds = CmdFds::new(); + bind_storage_roots(&mut cmd, &mut fds, &self.storage_root, &temp_runroot)?; + cmd.take_fds(fds); + + // Set the AIS on the skopeo subprocess so it can find the source image. + let storage_opt = format!("additionalimagestore={src_root}"); + cmd.env("STORAGE_OPTS", &storage_opt); + + let src = format!("containers-storage:{image}"); + let dest = format!( + "containers-storage:[overlay@{STORAGE_ALIAS_DIR}+/proc/self/fd/{STORAGE_RUN_FD}]{image}" + ); + // TODO: wire up structured progress reporting for this copy instead of + // suppressing output; for now --quiet avoids unstructured blob/layer spam. + cmd.args(["copy", "--quiet", "--remove-signatures", &src, &dest]); + + let mut cmd = AsyncCommand::from(cmd); + cmd.run().await?; + temp_runroot.close()?; + Ok(()) + } + /// Pull an image with streaming progress display. /// /// Uses the podman native libpod HTTP API instead of shelling out, diff --git a/crates/lib/src/spec.rs b/crates/lib/src/spec.rs index 4cc3e234e..f0a6b893b 100644 --- a/crates/lib/src/spec.rs +++ b/crates/lib/src/spec.rs @@ -400,6 +400,44 @@ impl Display for FilesystemOverlay { } } +/// The state of unified storage (composefs+ostree shared block storage). +/// +/// Serialized as kebab-case strings (`"disabled"`, `"enabled"`, `"enabled-with-copy"`) +/// to match the `[install.storage] unified` install config values. +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub enum UnifiedStorageState { + /// Unified storage is not enabled; composefs and ostree repos do not share blocks. + #[default] + Disabled, + /// Unified storage is enabled; images are stored via reflinks (zero physical copy). + Enabled, + /// Unified storage is enabled but may use byte copies when reflinks are unavailable. + /// + /// Corresponds to `storage.unified = "enabled-with-copy"` in the install config, + /// or to systems onboarded via `bootc image set-unified` (which always allows + /// copies since it runs at runtime rather than at install time). + EnabledWithCopy, +} + +impl Display for UnifiedStorageState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + UnifiedStorageState::Disabled => write!(f, "disabled"), + UnifiedStorageState::Enabled => write!(f, "enabled"), + UnifiedStorageState::EnabledWithCopy => write!(f, "enabled-with-copy"), + } + } +} + +/// Storage subsystem status. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct StorageStatus { + /// The state of unified block-sharing storage. + pub unified: UnifiedStorageState, +} + /// The status of the host system #[derive(Debug, Clone, Serialize, Default, Deserialize, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "camelCase")] @@ -424,6 +462,10 @@ pub struct HostStatus { /// The state of the overlay mounted on /usr pub usr_overlay: Option, + + /// Storage subsystem state. + #[serde(skip_serializing_if = "Option::is_none")] + pub storage: Option, } pub(crate) struct DeploymentEntry<'a> { diff --git a/crates/lib/src/status.rs b/crates/lib/src/status.rs index c6c3e8232..6d9181b7c 100644 --- a/crates/lib/src/status.rs +++ b/crates/lib/src/status.rs @@ -428,6 +428,15 @@ pub(crate) fn get_status( .map(|d| d.unlocked()) .and_then(crate::spec::deployment_unlocked_state_to_usr_overlay); + // Populate storage status from bootc repo metadata. + // The physical root for a booted ostree system is at /sysroot. + let storage_status = crate::utils::sysroot_dir(sysroot) + .and_then(|sysroot_dir| sysroot_dir.open_dir(crate::store::SYSROOT).map_err(Into::into)) + .and_then(|physical_root| crate::store::BootcRepoMeta::read(&physical_root)) + .ok() + .flatten() + .map(|meta| crate::spec::StorageStatus { unified: meta.unified }); + let mut host = Host::new(spec); host.status = HostStatus { staged, @@ -437,6 +446,7 @@ pub(crate) fn get_status( rollback_queued, ty, usr_overlay, + storage: storage_status, }; Ok((deployments, host)) } @@ -726,6 +736,9 @@ fn human_render_slot( // Show /usr overlay status write_usr_overlay(&mut out, slot, host_status, prefix_len)?; + // Show storage status + write_storage(&mut out, slot, host_status, prefix_len)?; + if verbose { // Show additional information in verbose mode similar to rpm-ostree if let Some(ostree) = &entry.ostree { @@ -784,6 +797,27 @@ fn write_usr_overlay( Ok(()) } +/// Helper function to render storage status +fn write_storage( + mut out: impl Write, + slot: Option, + host_status: &crate::spec::HostStatus, + prefix_len: usize, +) -> Result<()> { + // Only show storage info for the booted slot + if matches!(slot, Some(Slot::Booted)) { + if let Some(ref storage) = host_status.storage { + use crate::spec::UnifiedStorageState; + // Only show non-default (non-disabled) state + if storage.unified != UnifiedStorageState::Disabled { + write_row_name(&mut out, "Storage", prefix_len)?; + writeln!(out, "unified ({})", storage.unified)?; + } + } + } + Ok(()) +} + /// Output a rendering of a non-container boot entry. fn human_render_slot_ostree( mut out: impl Write, diff --git a/crates/lib/src/store/mod.rs b/crates/lib/src/store/mod.rs index 940d59100..0cddbd8cd 100644 --- a/crates/lib/src/store/mod.rs +++ b/crates/lib/src/store/mod.rs @@ -36,7 +36,7 @@ use ostree_ext::container_utils::ostree_booted; use ostree_ext::prelude::FileExt; use ostree_ext::sysroot::SysrootLock; use ostree_ext::{gio, ostree}; -use rustix::fs::Mode; +use rustix::fs::{Mode, OFlags}; use composefs::fsverity::Sha512HashValue; use composefs_ctl::composefs; @@ -62,6 +62,67 @@ pub const COMPOSEFS: &str = "composefs"; /// to avoid leaking information. pub(crate) const COMPOSEFS_MODE: Mode = Mode::from_raw_mode(0o700); +/// Path to bootc-specific metadata stored alongside the composefs `meta.json`. +/// +/// This file records bootc-level configuration for the composefs repository, +/// such as whether unified base-image storage is enabled. Its absence means +/// the system is not in unified-storage mode. The path is relative to the +/// physical system root. +pub(crate) const BOOTC_REPO_META: &str = "composefs/bootc.json"; + +/// bootc-specific metadata stored in `composefs/bootc.json`. +#[derive(Debug, Default, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "kebab-case")] +pub(crate) struct BootcRepoMeta { + /// Schema version; currently must be 1. + #[serde(default)] + pub version: u32, + + /// Unified storage state for this system. Matches the `storage.unified` + /// field in `bootc status --json` and the `[install.storage] unified` + /// install config. + /// + /// `disabled` (the default) means unified storage is not active. + /// `enabled` means images were imported with FICLONE (reflinks required). + /// `enabled-with-copy` means unified storage is active but byte-copy fallback + /// is allowed (e.g. `storage.unified = "enabled-with-copy"` or + /// onboarded via `bootc image set-unified`). + #[serde(default)] + pub unified: crate::spec::UnifiedStorageState, +} + +impl BootcRepoMeta { + /// Read `composefs/bootc.json` from the physical root, if it exists. + /// Returns `Ok(None)` when the file is absent (i.e. unified storage + /// has never been enabled on this system). + pub(crate) fn read(physical_root: &Dir) -> Result> { + match physical_root.open(BOOTC_REPO_META) { + Ok(f) => { + let meta: Self = serde_json::from_reader(std::io::BufReader::new(f)) + .with_context(|| format!("Parsing {BOOTC_REPO_META}"))?; + Ok(Some(meta)) + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(e) => Err(e).with_context(|| format!("Opening {BOOTC_REPO_META}")), + } + } + + /// Write `composefs/bootc.json` to the physical root, atomically. + /// The composefs directory must already exist. + pub(crate) fn write(&self, physical_root: &Dir) -> Result<()> { + let json = serde_json::to_string_pretty(self).context("Serializing BootcRepoMeta")?; + // Write atomically via a temp file. + let tmp = format!("{BOOTC_REPO_META}.tmp"); + physical_root + .write(&tmp, json.as_bytes()) + .with_context(|| format!("Writing {tmp}"))?; + physical_root + .rename(&tmp, physical_root, BOOTC_REPO_META) + .with_context(|| format!("Renaming {tmp} -> {BOOTC_REPO_META}"))?; + Ok(()) + } +} + /// Ensure the composefs directory exists in the given physical root /// with the correct permissions (mode 0700). pub(crate) fn ensure_composefs_dir(physical_root: &Dir) -> Result<()> { @@ -276,6 +337,7 @@ impl BootedStorage { ostree: Default::default(), composefs: OnceCell::from(composefs.clone()), imgstore: Default::default(), + reflinks_supported: Default::default(), }; // prepend_custom_prefix is idempotent: it checks has_prefix on each @@ -312,6 +374,7 @@ impl BootedStorage { ostree: OnceCell::from(sysroot), composefs: Default::default(), imgstore: Default::default(), + reflinks_supported: Default::default(), }; Some(Self { storage }) @@ -373,6 +436,16 @@ pub(crate) struct Storage { composefs: OnceCell>, /// The containers-image storage used for LBIs imgstore: OnceCell, + + /// Cached result of the FICLONE reflink probe on `physical_root`. + /// + /// `true` → the filesystem hosting `physical_root` supports reflinks; + /// determined by successfully cloning between two anonymous + /// tmpfiles created inside `physical_root` itself. + /// `false` → FICLONE returned `EOPNOTSUPP`/`EXDEV` (e.g. ext4). + /// + /// Populated lazily on first call to [`Storage::reflinks_supported`]. + reflinks_supported: OnceCell, } /// Cached image status data used for optimization. @@ -428,6 +501,7 @@ impl Storage { ostree: ostree_cell, composefs: Default::default(), imgstore: Default::default(), + reflinks_supported: Default::default(), }) } @@ -561,6 +635,62 @@ impl Storage { Ok(r) } + /// Probe whether the filesystem hosting `physical_root` supports reflinks + /// (FICLONE / copy-on-write extent sharing), caching the result. + /// + /// The probe creates two anonymous O_TMPFILE inodes inside `physical_root` + /// and attempts `ioctl(FICLONE)` between them. Because both files live on + /// the same filesystem as the composefs and ostree repositories, a + /// successful probe guarantees that reflinks will work between those repos. + /// + /// Returns `true` on XFS / btrfs, `false` on ext4 and anything else that + /// returns `EOPNOTSUPP` / `EXDEV`. + pub(crate) fn reflinks_supported(&self) -> Result { + if let Some(&cached) = self.reflinks_supported.get() { + return Ok(cached); + } + + // Open the physical root as a plain rustix fd so we can use openat. + let dir_fd = rustix::fs::open( + self.physical_root_path.as_str(), + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .context("Opening physical root for reflink probe")?; + + // Source: an anonymous inode we write one byte into. + let src = rustix::fs::openat( + &dir_fd, + c".", + OFlags::RDWR | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o600), + ) + .context("Creating source tmpfile for reflink probe")?; + rustix::io::write(&src, b"x").context("Writing probe byte")?; + + // Destination: another anonymous inode to clone into. + let dst = rustix::fs::openat( + &dir_fd, + c".", + OFlags::RDWR | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o600), + ) + .context("Creating dest tmpfile for reflink probe")?; + + let supported = match rustix::fs::ioctl_ficlone(&dst, &src) { + Ok(()) => true, + Err(rustix::io::Errno::OPNOTSUPP) | Err(rustix::io::Errno::XDEV) => false, + Err(e) => { + // Any other error (e.g. EPERM) is unexpected; log and assume not supported. + tracing::warn!("Unexpected error probing reflink support: {e}; assuming unsupported"); + false + } + }; + + tracing::debug!("Reflink probe on {}: {}", self.physical_root_path, supported); + Ok(*self.reflinks_supported.get_or_init(|| supported)) + } + /// Update the mtime on the storage root directory. /// /// This touches `ostree/bootc` (or its symlink target on composefs diff --git a/crates/ostree-ext/src/container/composefs_import.rs b/crates/ostree-ext/src/container/composefs_import.rs new file mode 100644 index 000000000..70ac58350 --- /dev/null +++ b/crates/ostree-ext/src/container/composefs_import.rs @@ -0,0 +1,970 @@ +//! Synthesize an ostree commit directly from a composefs OCI repository. +//! +//! This is the inverse of the existing ostree→composefs flow in libostree. +//! Rather than importing OCI layers through the tar pipeline, we walk the +//! composefs `FileSystem` tree and write each entry as an ostree object. +//! +//! For external regular files we bypass the ostree C library's `write_content` +//! API and instead write content objects directly to disk. This lets us +//! attempt `FICLONE` (reflink) from the composefs object fd, falling back to +//! `copy_file_range` / read-write. The ostree SHA256 content checksum is +//! still computed via the C library's `checksum_file_from_input` (a pure, +//! repo-independent function) so the serialisation format is never +//! reimplemented. +//! +//! Metadata objects (dirmeta, dirtree, commit) and small inline files continue +//! to use the ostree C API — they are tiny and benefit from the existing +//! validation. +//! +//! # Why reflink, not hardlink +//! +//! A composefs object is content-addressed by its SHA-512 fsverity digest. +//! Multiple filesystem paths with identical content share a single composefs +//! object (inode). In ostree bare mode, however, every content object carries +//! its own per-inode metadata: uid, gid, mode, and *all* xattrs including +//! `security.selinux`. The same composefs object can be referenced by two +//! ostree objects that need different SELinux labels — for example +//! `/etc/sudo.conf` (label `etc_t`) and +//! `/usr/share/doc/sudo/examples/sudo.conf` (label `usr_t`). Because a +//! single inode can hold only one `security.selinux` value, hardlinking both +//! ostree objects to the shared composefs inode is impossible: the second +//! `setxattr` would overwrite the first, causing `ostree fsck` to report a +//! checksum mismatch. +//! +//! Reflinks (FICLONE) solve this: each ostree object gets its own inode and +//! therefore its own metadata, while the underlying disk extents are shared +//! with the composefs object — zero copy, same space on the wire. +//! +//! # Filesystem requirements +//! +//! FICLONE requires a reflink-capable filesystem such as XFS or btrfs. On +//! these filesystems the ostree and composefs repositories share disk blocks +//! so installing the OS adds no extra space for the ostree content objects. +//! On ext4 (and other filesystems without reflink support) FICLONE fails with +//! `EOPNOTSUPP` and the code falls back to a byte copy; installation succeeds +//! but no block sharing occurs between the two repositories. +//! +//! # SELinux labels and the NUL-terminator fix +//! +//! `selabel()` from composefs-rs applies SELinux labels in-memory to the +//! filesystem tree before ostree synthesis begins. On SELinux-disabled hosts +//! (such as a container used for `bootc install`) the `fsetxattr` call for +//! `security.selinux` is a no-op; labels are applied by the kernel at first +//! boot during auto-relabeling. The ostree checksums are computed with the +//! correct label values (because `checksum_file_from_input` sees them from the +//! in-memory tree), so `ostree fsck` passes after boot. +//! +//! One subtlety: composefs-rs `selabel()` stores SELinux values *without* a +//! trailing NUL, but the kernel stores them *with* one. `xattrs_to_variant` +//! appends the NUL when computing the ostree checksum so that it matches what +//! `ostree fsck` reads back from the live filesystem. + +use std::collections::BTreeMap; +use std::ffi::OsStr; +use std::os::fd::{AsFd as _, AsRawFd as _, BorrowedFd}; +use std::os::unix::ffi::OsStrExt as _; + +use anyhow::{Context as _, Result}; + +use composefs_ctl::composefs::fsverity::Sha512HashValue; +use composefs_ctl::composefs::generic_tree::{Inode, Stat}; +use composefs_ctl::composefs::repository::Repository; +use composefs_ctl::composefs::tree::{Directory, Leaf, LeafContent, RegularFile}; +use composefs_ctl::composefs_boot::selabel::selabel; +use composefs_ctl::composefs_oci; + +use crate::container::store::{ + META_COMPOSEFS_SYNTHESIZED, META_CONFIG, META_MANIFEST, META_MANIFEST_DIGEST, +}; +use crate::prelude::{Cast as _, ToVariant as _}; +use crate::{gio, glib}; + +type ComposefsRepo = Repository; + +/// Tracks per-import statistics and FICLONE capability for the composefs→ostree +/// object copy loop. +/// +/// # Why reflink, not hardlink +/// +/// A composefs object is a content-addressed file identified by its SHA-512 +/// fsverity digest. Multiple filesystem paths with identical content share the +/// same composefs object (inode). In ostree's bare repo mode, however, each +/// content object carries its own metadata — uid, gid, mode, and **all xattrs +/// including `security.selinux`** — directly on the inode. +/// +/// The same composefs object can be referenced by ostree objects that need +/// different metadata. For example `/etc/sudo.conf` and +/// `/usr/share/doc/sudo/examples/sudo.conf` have identical bytes but different +/// SELinux labels (`etc_t` vs `usr_t`), producing different ostree content +/// checksums. A single inode can carry only one `security.selinux` value, so +/// hardlinking both ostree objects to the shared composefs inode is impossible: +/// whichever metadata is written second silently overwrites the first, causing +/// `ostree fsck` to report a checksum mismatch. +/// +/// Reflinks (FICLONE) solve this cleanly: each ostree object gets its own +/// inode (its own metadata) while the underlying disk extents are shared with +/// the composefs object — zero copy, same space efficiency. If the filesystem +/// does not support reflinks (e.g. ext4) we fall back to a byte copy; unified +/// storage is not possible in that configuration. +struct DirectImportContext { + /// `true` after FICLONE has failed on this device pair. + ficlone_failed: bool, + /// Count of files reflinked (FICLONE succeeded — blocks shared with composefs object). + reflinked: u64, + /// Count of files written via byte copy fallback (filesystem lacks reflink support). + copied: u64, + /// Count of objects that already existed in the repo (skipped). + existing: u64, + /// When `true`, any FICLONE failure is returned as a hard error instead of + /// silently falling back to byte copy. Set by the caller when it has already + /// confirmed that reflinks are supported on the storage (e.g. via a probe). + reflinks_required: bool, +} + +/// Convert composefs xattrs (`BTreeMap`) to the ostree GVariant format (`a(ayay)`). +/// +/// The ostree on-disk format stores xattr names as NUL-terminated C strings inside +/// `ay` (byte array) GVariant values — matching the C library's use of +/// `g_variant_new_bytestring(name)`. The validation function in libostree reads +/// names back with the `"(^&ay@ay)"` format specifier, which returns a raw pointer +/// into the GVariant buffer, and then calls `strcmp` on it — so the trailing NUL +/// *must* be present. Without it the first byte of the *value* would be treated as +/// part of the name, and if that byte happens to be `0x00` the validator throws +/// "Invalid xattr name (empty or missing NUL)". +fn xattrs_to_variant(xattrs: &BTreeMap, Box<[u8]>>) -> Option { + if xattrs.is_empty() { + return None; + } + let children = xattrs.iter().map(|(k, v)| { + // Append NUL so that ostree's C validator can read the name as a C string. + let mut name_bytes = k.as_bytes().to_vec(); + name_bytes.push(b'\0'); + // SELinux stores label values as NUL-terminated strings on the filesystem. + // The composefs selabel() function strips the NUL, but ostree fsck reads + // xattrs back from disk (where the kernel always stores them with NUL) when + // verifying checksums. Ensure our checksum matches by adding the NUL here + // for security.selinux values that are missing it. + let value: &[u8] = v.as_ref(); + let value_owned; + let value = if k.as_bytes() == b"security.selinux" + && !value.last().copied().map_or(true, |b| b == 0) + { + value_owned = [value, &[0u8]].concat(); + value_owned.as_slice() + } else { + value + }; + glib::Variant::tuple_from_iter([name_bytes.as_slice().to_variant(), value.to_variant()]) + }); + Some(glib::Variant::array_from_iter::<(&[u8], &[u8])>(children)) +} + +/// Build an ostree `DirMeta` GVariant from a composefs `Stat`. +fn create_dirmeta(stat: &Stat) -> glib::Variant { + let finfo = gio::FileInfo::new(); + finfo.set_attribute_uint32("unix::uid", stat.st_uid); + finfo.set_attribute_uint32("unix::gid", stat.st_gid); + // ostree's create_directory_metadata requires the full mode word including + // the S_IFDIR type bits. The composefs Stat only stores the permission + // bits, so we must OR in S_IFDIR ourselves. + let mode = libc::S_IFDIR | stat.st_mode; + finfo.set_attribute_uint32("unix::mode", mode); + let xattrs = xattrs_to_variant(&stat.xattrs); + crate::ostree::create_directory_metadata(&finfo, xattrs.as_ref()) +} + +/// Top-level paths from the OCI image that are API/virtual filesystems at runtime. +/// +/// These paths (`/proc`, `/sys`, etc.) must NOT have their children written into +/// the ostree commit — they are mounted as kernel-provided virtual filesystems at +/// boot time and any content baked into the image is irrelevant (and in the case +/// of device nodes, actively harmful). The directory entries themselves are +/// written as empty directories so that the mount points exist. +/// +/// This mirrors the `EXCLUDED_TOPLEVEL_PATHS` list and the filtering logic in +/// `crate::tar::write::normalize_validate_path()`. +const EXCLUDED_TOPLEVEL_PATHS: &[&str] = &["run", "tmp", "proc", "sys", "dev"]; + +/// Walk the composefs root directory, applying ostree path normalization: +/// +/// - `/etc` → `usr/etc` (ostree's deployment mechanism expects config here) +/// - Children of API/virtual-filesystem mounts (`/proc`, `/sys`, `/dev`, `/run`, +/// `/tmp`) are **omitted** — the directory entries are written as empty mount +/// points but their content is never part of the commit. +/// +/// This matches the filtering that the tar import pipeline applies via +/// `normalize_validate_path()` in `tar/write.rs`. +fn write_dir_to_mtree_remap_etc( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + dir: &Directory, + leaves: &[Leaf], + mtree: &crate::ostree::MutableTree, + ctx: &mut DirectImportContext, +) -> Result<()> { + // Write the root dirmeta first + let dirmeta = create_dirmeta(&dir.stat); + let meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &dirmeta, + gio::Cancellable::NONE, + ) + .context("Writing root dirmeta")?; + mtree.set_metadata_checksum(&meta_csum.to_hex()); + + // Collect the `etc` directory entry (if any) for deferred processing + let mut etc_dir: Option<&Directory> = None; + + for (name, inode) in dir.sorted_entries() { + let name = name + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 entry name: {:?}", name))?; + + // Intercept `etc` at the root and defer it — we'll write it + // as `usr/etc` after processing everything else. + if name == "etc" { + match inode { + Inode::Directory(child) => { + etc_dir = Some(child); + continue; + } + _ => { + // Unusual: /etc is not a directory (e.g. symlink). + // Write it as-is — the deployment code will handle it + // or fail later with a more specific error. + } + } + } + + // API/virtual-filesystem mount points: include the empty directory itself + // (so the mount point exists at runtime) but skip all children. This + // mirrors the tar importer's EXCLUDED_TOPLEVEL_PATHS handling. + if EXCLUDED_TOPLEVEL_PATHS.contains(&name) { + if let Inode::Directory(child) = inode { + let child_mtree = mtree.ensure_dir(name)?; + // Write only the dirmeta (permissions/xattrs) — no children. + let child_dirmeta = create_dirmeta(&child.stat); + let child_meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &child_dirmeta, + gio::Cancellable::NONE, + ) + .with_context(|| format!("Writing dirmeta for excluded path {name}"))?; + child_mtree.set_metadata_checksum(&child_meta_csum.to_hex()); + tracing::debug!("Composefs import: skipping children of excluded path /{name}"); + continue; + } + // Non-directory at an excluded path (e.g. a symlink for /tmp)? + // Skip it entirely. + tracing::debug!("Composefs import: skipping non-directory excluded path /{name}"); + continue; + } + + match inode { + Inode::Directory(child) => { + let child_mtree = mtree.ensure_dir(name)?; + write_dir_to_mtree(orepo, crepo, child, leaves, &child_mtree, ctx)?; + } + Inode::Leaf(leaf_id, _) => { + let leaf = &leaves[leaf_id.0]; + let csum = write_leaf(orepo, crepo, &leaf.stat, &leaf.content, ctx)?; + mtree + .replace_file(name, &csum) + .with_context(|| format!("Inserting {name} into mtree"))?; + } + } + } + + // Now write the deferred `/etc` directory as `usr/etc`. + if let Some(etc) = etc_dir { + let usr_mtree = mtree.ensure_dir("usr")?; + let usr_etc_mtree = usr_mtree.ensure_dir("etc")?; + write_dir_to_mtree(orepo, crepo, etc, leaves, &usr_etc_mtree, ctx) + .context("Writing /etc as usr/etc")?; + } + + Ok(()) +} + +/// Recursively walk a composefs directory, writing each entry into `mtree`. +fn write_dir_to_mtree( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + dir: &Directory, + leaves: &[Leaf], + mtree: &crate::ostree::MutableTree, + ctx: &mut DirectImportContext, +) -> Result<()> { + let dirmeta = create_dirmeta(&dir.stat); + let meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &dirmeta, + gio::Cancellable::NONE, + ) + .context("Writing dirmeta")?; + mtree.set_metadata_checksum(&meta_csum.to_hex()); + + for (name, inode) in dir.sorted_entries() { + let name = name + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 entry name: {:?}", name))?; + match inode { + Inode::Directory(child) => { + let child_mtree = mtree.ensure_dir(name)?; + write_dir_to_mtree(orepo, crepo, child, leaves, &child_mtree, ctx)?; + } + Inode::Leaf(leaf_id, _) => { + let leaf = &leaves[leaf_id.0]; + let csum = write_leaf(orepo, crepo, &leaf.stat, &leaf.content, ctx)?; + mtree + .replace_file(name, &csum) + .with_context(|| format!("Inserting {name} into mtree"))?; + } + } + } + Ok(()) +} + +/// Write a single composefs leaf (regular file or symlink) to the ostree repo. +/// Returns the hex content checksum. +fn write_leaf( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + content: &LeafContent, + ctx: &mut DirectImportContext, +) -> Result { + let xattrs = xattrs_to_variant(&stat.xattrs); + match content { + LeafContent::Symlink(target) => { + let target = target + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 symlink target"))?; + let csum = orepo + .write_symlink( + None, + stat.st_uid, + stat.st_gid, + xattrs.as_ref(), + target, + gio::Cancellable::NONE, + ) + .context("Writing symlink")?; + Ok(csum.to_string()) + } + LeafContent::Regular(file) => { + write_regular_file(orepo, crepo, stat, file, xattrs.as_ref(), ctx) + } + other => anyhow::bail!( + "Unsupported composefs leaf (device/fifo/socket): {:?}", + std::mem::discriminant(other) + ), + } +} + +/// Write a regular file into the ostree repo. +/// +/// Inline files use the ostree C API directly. External files bypass the C +/// `write_content` path and instead write content objects directly to disk, +/// attempting FICLONE (reflink) first and falling back to a byte copy. +/// +/// The ostree content checksum is computed via the C library's +/// `checksum_file_from_input` to guarantee format compatibility. +fn write_regular_file( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + file: &RegularFile, + xattrs: Option<&glib::Variant>, + ctx: &mut DirectImportContext, +) -> Result { + // ostree requires the full mode word including S_IFREG + let mode = libc::S_IFREG | stat.st_mode; + + match file { + RegularFile::Inline(data) => { + let csum = orepo + .write_regfile_inline( + None, + stat.st_uid, + stat.st_gid, + mode, + xattrs, + data, + gio::Cancellable::NONE, + ) + .context("Writing inline file")?; + Ok(csum.to_string()) + } + RegularFile::External(object_id, size) => { + write_external_file_direct(orepo, crepo, stat, object_id, *size, xattrs, mode, ctx) + } + } +} + +/// Compute the ostree content checksum for a regular file using the C API. +/// +/// This calls `ostree_checksum_file_from_input()` which is a pure, +/// repo-independent function. It constructs the standard file header +/// GVariant (uid/gid/mode/xattrs) and hashes `header || content`. +fn compute_content_checksum( + stat: &Stat, + xattrs: Option<&glib::Variant>, + fd: BorrowedFd<'_>, + size: u64, + mode: u32, +) -> Result { + let finfo = gio::FileInfo::new(); + finfo.set_attribute_uint32("unix::uid", stat.st_uid); + finfo.set_attribute_uint32("unix::gid", stat.st_gid); + finfo.set_attribute_uint32("unix::mode", mode); + finfo.set_size(size as i64); + finfo.set_file_type(gio::FileType::Regular); + + // Dup the fd so we can hand an owned File to ReadInputStream without + // touching the caller's fd or using unsafe. + let file = std::fs::File::from(fd.try_clone_to_owned().context("Dup fd for checksum")?); + let istream = gio::ReadInputStream::new(file); + let csum = crate::ostree::checksum_file_from_input( + &finfo, + xattrs, + Some(&istream), + crate::ostree::ObjectType::File, + gio::Cancellable::NONE, + ) + .map_err(|e| anyhow::anyhow!("Computing content checksum: {e}"))?; + + Ok(csum.to_string()) +} + +/// Build the `user.ostreemeta` GVariant `(uuu@a(ayay))` for bare-user mode. +/// +/// This matches the C function `create_file_metadata()` in ostree-repo-commit.c. +fn build_bareuser_metadata( + uid: u32, + gid: u32, + mode: u32, + xattrs: Option<&glib::Variant>, +) -> glib::Variant { + let empty_xattrs; + let xattrs = match xattrs { + Some(v) => v.clone(), + None => { + empty_xattrs = glib::Variant::array_from_iter::<(&[u8], &[u8])>(std::iter::empty()); + empty_xattrs + } + }; + // ostree stores these as big-endian u32 values + glib::Variant::tuple_from_iter([ + uid.to_be().to_variant(), + gid.to_be().to_variant(), + mode.to_be().to_variant(), + xattrs, + ]) +} + +/// Apply file metadata to an fd according to the ostree repo mode. +/// +/// For `Bare`: sets real uid/gid, mode, and xattrs via syscalls. +/// For `BareUser`: stores metadata in a `user.ostreemeta` xattr and +/// applies a masked mode suitable for non-root operation. +fn apply_file_metadata( + fd: BorrowedFd<'_>, + stat: &Stat, + mode: u32, + xattrs: Option<&glib::Variant>, + repo_mode: crate::ostree::RepoMode, +) -> Result<()> { + use rustix::fs::{Mode, Timestamps, XattrFlags, fchmod, fchown, fsetxattr, futimens}; + + match repo_mode { + crate::ostree::RepoMode::Bare => { + fchown( + fd, + Some(rustix::process::Uid::from_raw(stat.st_uid)), + Some(rustix::process::Gid::from_raw(stat.st_gid)), + ) + .context("fchown")?; + fchmod(fd, Mode::from_raw_mode(mode)).context("fchmod")?; + // Set real xattrs on the file. For security.selinux the composefs + // selabel() function stores values without the NUL terminator that + // the kernel expects; add it here so the on-disk value matches what + // xattrs_to_variant() (and thus the ostree checksum) sees. + for (name, value) in &stat.xattrs { + let value: &[u8] = value.as_ref(); + let value_with_nul; + let value = if name.as_bytes() == b"security.selinux" + && !value.last().copied().map_or(true, |b| b == 0) + { + value_with_nul = [value, &[0u8]].concat(); + value_with_nul.as_slice() + } else { + value + }; + fsetxattr(fd, name.as_bytes(), value, XattrFlags::empty()) + .with_context(|| format!("fsetxattr({:?})", name))?; + } + } + crate::ostree::RepoMode::BareUser => { + // Write the user.ostreemeta xattr + let meta = build_bareuser_metadata(stat.st_uid, stat.st_gid, mode, xattrs); + let meta_bytes = meta.data_as_bytes(); + fsetxattr( + fd, + c"user.ostreemeta", + meta_bytes.as_ref(), + XattrFlags::empty(), + ) + .context("fsetxattr(user.ostreemeta)")?; + // Mask mode for non-root safety (matches libostree) + let content_mode = (mode & (libc::S_IFREG | 0o775)) | libc::S_IRUSR; + fchmod(fd, Mode::from_raw_mode(content_mode)).context("fchmod (bare-user)")?; + } + other => anyhow::bail!("Unsupported ostree repo mode for direct write: {other:?}"), + } + + // Set mtime to OSTREE_TIMESTAMP (0), matching libostree behavior. + // UTIME_OMIT for atime means "leave atime unchanged". + futimens( + fd, + &Timestamps { + last_access: rustix::fs::Timespec { + tv_sec: 0, + tv_nsec: rustix::fs::UTIME_OMIT, + }, + last_modification: rustix::fs::Timespec { + tv_sec: 0, + tv_nsec: 0, + }, + }, + ) + .context("futimens")?; + + Ok(()) +} + +/// Ensure the `objects/XX/` directory exists under the repo. +fn ensure_object_dir(repo_dfd: BorrowedFd<'_>, prefix: &str) -> Result<()> { + use rustix::fs::{Mode, mkdirat}; + + let objects_dir = format!("objects/{prefix}"); + match mkdirat(repo_dfd, objects_dir.as_str(), Mode::from_raw_mode(0o777)) { + Ok(()) => Ok(()), + Err(rustix::io::Errno::EXIST) => Ok(()), + Err(e) => Err(e).context(format!("Creating {objects_dir}"))?, + } +} + +/// Write a composefs external regular file directly into the ostree repo. +/// +/// # Sharing strategy: FICLONE (reflink) → byte copy +/// +/// Each ostree content object is written as a fresh inode (via `O_TMPFILE`) +/// and populated from the composefs object using `FICLONE` (reflink) when the +/// filesystem supports it, falling back to `copy_file_range` / `std::io::copy`. +/// +/// **Why not hardlink?** A composefs object is identified by its SHA-512 +/// fsverity digest of raw content. The same object can be referenced by +/// multiple ostree content objects that carry different metadata — in +/// particular different `security.selinux` xattrs assigned by the SELinux +/// policy relabeling step. Ostree bare mode stores metadata (uid/gid/mode/ +/// xattrs) on the inode, so a single shared inode cannot satisfy two different +/// metadata requirements. FICLONE shares the underlying disk extents while +/// keeping inodes independent, giving each ostree object its own metadata. +/// +/// On filesystems without reflink support (ext4, etc.) FICLONE fails with +/// `EOPNOTSUPP` and we fall back to a full byte copy. Unified storage +/// (space-sharing between the composefs and ostree repos) requires a +/// reflink-capable filesystem (XFS, btrfs, …). +/// +/// Returns the hex SHA256 content checksum. +#[allow(clippy::too_many_arguments)] +fn write_external_file_direct( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + object_id: &Sha512HashValue, + size: u64, + xattrs: Option<&glib::Variant>, + mode: u32, + ctx: &mut DirectImportContext, +) -> Result { + use rustix::fs::{AtFlags, Mode, OFlags, linkat, openat}; + use std::io::{Seek, SeekFrom}; + + let cfs_fd = crepo + .open_object(object_id) + .context("Opening composefs object")?; + + // Compute the ostree content checksum using the C API. + let checksum = compute_content_checksum(stat, xattrs, cfs_fd.as_fd(), size, mode) + .context("Computing content checksum")?; + + // Check if the object already exists — skip if so. + if orepo + .has_object( + crate::ostree::ObjectType::File, + &checksum, + gio::Cancellable::NONE, + ) + .context("Checking for existing object")? + { + ctx.existing += 1; + return Ok(checksum); + } + + let repo_dfd = orepo.dfd_borrow(); + let repo_mode = orepo.mode(); + + // Ensure objects/XX/ directory exists. + let (prefix, _rest) = checksum.split_at(2); + ensure_object_dir(repo_dfd, prefix)?; + + let obj_path = format!("objects/{prefix}/{}.file", &checksum[2..]); + + // Create an O_TMPFILE in the repo's tmp/ directory. This gives us an + // anonymous inode we can populate before atomically linking it into place, + // avoiding partial writes visible to concurrent readers. + let tmp_fd = openat( + repo_dfd, + c"tmp", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .context("Creating tmpfile in ostree repo/tmp")?; + + // Seek the source fd back to the start (compute_content_checksum consumed it). + let mut src_file = std::fs::File::from(cfs_fd); + src_file + .seek(SeekFrom::Start(0)) + .context("Seeking composefs fd to start")?; + + // --- Primary path: FICLONE (reflink) --- + // + // Shares the underlying disk extents with the composefs object while giving + // the ostree object its own inode (and thus its own metadata). On XFS this + // is a near-zero-cost operation. After the first failure we stop trying. + let mut reflinked = false; + if !ctx.ficlone_failed { + match rustix::fs::ioctl_ficlone(&tmp_fd, &src_file) { + Ok(()) => { + reflinked = true; + } + Err(e @ (rustix::io::Errno::OPNOTSUPP | rustix::io::Errno::XDEV)) => { + if ctx.reflinks_required { + return Err(anyhow::anyhow!( + "FICLONE failed ({e}) but reflinks were confirmed supported \ + by the storage probe — cannot safely continue import" + )); + } + tracing::debug!( + "FICLONE not supported on this filesystem; \ + falling back to copy for all remaining objects" + ); + ctx.ficlone_failed = true; + } + Err(e) => { + if ctx.reflinks_required { + return Err(anyhow::anyhow!( + "FICLONE failed unexpectedly ({e}); reflinks are required" + )); + } + tracing::debug!("FICLONE failed unexpectedly ({e}); falling back to copy"); + ctx.ficlone_failed = true; + } + } + } + + if reflinked { + ctx.reflinked += 1; + } else { + // --- Fallback: byte copy via copy_file_range --- + let mut dst_file = std::fs::File::from(tmp_fd.try_clone().context("Cloning tmpfile fd")?); + let n = std::io::copy(&mut src_file, &mut dst_file).context("Copying file content")?; + anyhow::ensure!( + n == size, + "Size mismatch: expected {size} bytes but copied {n}" + ); + ctx.copied += 1; + } + + // Apply metadata (uid/gid/mode/xattrs/mtime) to the new inode. + apply_file_metadata(tmp_fd.as_fd(), stat, mode, xattrs, repo_mode)?; + + // Atomically link the tmpfile into place at objects/XX/rest.file. + let proc_path = format!("/proc/self/fd/{}", tmp_fd.as_raw_fd()); + match linkat( + rustix::fs::CWD, + proc_path.as_str(), + repo_dfd, + obj_path.as_str(), + AtFlags::SYMLINK_FOLLOW, + ) { + Ok(()) => {} + Err(rustix::io::Errno::EXIST) => { + // A concurrent writer (or an earlier call) already wrote this object. + } + Err(e) => { + return Err(e).with_context(|| format!("Linking tmpfile to {obj_path}")); + } + } + + Ok(checksum) +} + +/// Synthesize an ostree commit from a composefs repository that has already +/// pulled an OCI image. +/// +/// This is the inverse of the ostree→composefs path in libostree: instead of +/// reading an ostree commit to build an EROFS image, we walk the composefs +/// `FileSystem` tree and write each entry as an ostree object. +/// +/// The resulting commit carries the same OCI metadata as a commit produced by +/// the standard `ostree-container` import pipeline: +/// - `ostree.container.image-config` +/// - `ostree.manifest` +/// - `ostree.manifest-digest` +/// +/// External regular files are written directly to disk with FICLONE (reflink) +/// attempted first, falling back to a byte copy. On the same filesystem this +/// avoids copying data entirely. +pub fn import_from_composefs_repo( + ostree_repo: &crate::ostree::Repo, + composefs_repo: &ComposefsRepo, + config_digest: &composefs_oci::OciDigest, + manifest_digest: &str, + image_manifest: &crate::oci_spec::image::ImageManifest, + image_config: &crate::oci_spec::image::ImageConfiguration, + reflinks_required: bool, +) -> Result { + let cancellable = gio::Cancellable::NONE; + + // Reconstruct the merged filesystem tree from the composefs store + let mut fs = composefs_oci::image::create_filesystem(composefs_repo, config_digest, None) + .context("Reconstructing composefs filesystem tree")?; + + // Apply SELinux labels in-memory before writing any objects. + // selabel walks the FileSystem tree and reads policy files directly from + // the composefs object store — no file copies are made. + let labeled = selabel(&mut fs, composefs_repo) + .context("Applying SELinux labels from composefs image policy")?; + tracing::debug!( + labeled, + total_leaves = fs.leaves.len(), + "SELinux labeling complete" + ); + + let txn = ostree_repo + .auto_transaction(cancellable) + .context("Beginning ostree transaction")?; + + let mut ctx = DirectImportContext { + ficlone_failed: false, + reflinked: 0, + copied: 0, + existing: 0, + reflinks_required, + }; + + // Walk and write all objects. + // + // The composefs filesystem tree uses the OCI image layout where `/etc` + // lives at the tree root. However, ostree's deployment mechanism expects + // configuration to be at `usr/etc` (it copies `usr/etc` → `etc` during + // deployment initialization and performs a 3-way merge). The old + // `ImageImporter` tar pipeline applies this `/etc` → `usr/etc` remap + // via `normalize_validate_path()` in `tar/write.rs`. + // + // We replicate that remapping here: if the composefs root has an `etc` + // directory, we write it into the ostree tree as `usr/etc` instead of + // a top-level `etc`. + let root_mtree = crate::ostree::MutableTree::new(); + write_dir_to_mtree_remap_etc( + ostree_repo, + composefs_repo, + &fs.root, + &fs.leaves, + &root_mtree, + &mut ctx, + ) + .context("Writing composefs tree into ostree")?; + + tracing::info!( + reflinked = ctx.reflinked, + copied = ctx.copied, + existing = ctx.existing, + "Composefs→ostree import complete: reflinked={} copied={} existing={}", + ctx.reflinked, + ctx.copied, + ctx.existing, + ); + + // Seal the tree + let root = ostree_repo + .write_mtree(&root_mtree, cancellable) + .context("Sealing MutableTree")?; + let root = root + .downcast::() + .map_err(|_| anyhow::anyhow!("BUG: write_mtree did not return a RepoFile"))?; + + // Attach OCI metadata so the commit looks like a normal ostree-container import, + // plus a flag indicating this commit was synthesized from a composefs repository + // (so per-layer blob refs do not exist). + let meta = glib::VariantDict::new(None); + meta.insert( + META_CONFIG, + &serde_json::to_string(image_config).context("Serializing image config")?, + ); + meta.insert( + META_MANIFEST, + &serde_json::to_string(image_manifest).context("Serializing manifest")?, + ); + meta.insert(META_MANIFEST_DIGEST, &manifest_digest); + meta.insert(crate::ostree::METADATA_KEY_BOOTABLE.as_ref(), &true); + meta.insert(META_COMPOSEFS_SYNTHESIZED, &true); + let meta = meta.to_variant(); + + // Use the image creation timestamp for the ostree commit, matching the + // behaviour of the old ImageImporter pipeline. This is important because + // rpm-ostree unconditionally expects a non-zero timestamp in the deployment + // metadata; a zero (epoch) timestamp can cause `rpm-ostree status` to abort. + let timestamp = + crate::container::store::timestamp_of_manifest_or_config(image_manifest, image_config) + .unwrap_or_else(|| chrono::offset::Utc::now().timestamp() as u64); + + let commit = ostree_repo + .write_commit_with_time(None, None, None, Some(&meta), &root, timestamp, cancellable) + .context("Writing ostree commit")?; + + txn.commit(cancellable).context("Committing transaction")?; + + Ok(commit.to_string()) +} + +#[cfg(test)] +mod tests { + use rustix::fd::AsRawFd as _; + use rustix::fs::{AtFlags, Mode, OFlags, linkat, openat}; + use std::os::fd::AsFd as _; + + /// Verify that O_TMPFILE can be materialized via /proc/self/fd/N without + /// root (the linkat+SYMLINK_FOLLOW approach used by write_external_file_direct). + #[test] + fn test_otmpfile_materialize_via_proc() { + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let dir_fd = rustix::fs::open( + dir.path(), + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .unwrap(); + + // Create O_TMPFILE — anonymous inode in the directory + let tmp_fd = openat( + &dir_fd, + c".", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .expect("O_TMPFILE"); + + rustix::io::write(&tmp_fd, b"hello").unwrap(); + + // Materialize via /proc/self/fd/N with SYMLINK_FOLLOW + let proc_path = format!("/proc/self/fd/{}", tmp_fd.as_fd().as_raw_fd()); + linkat( + rustix::fs::CWD, + proc_path.as_str(), + &dir_fd, + "materialized.txt", + AtFlags::SYMLINK_FOLLOW, + ) + .expect("linkat via /proc/self/fd should work without root"); + + let contents = std::fs::read(dir.path().join("materialized.txt")).unwrap(); + assert_eq!(contents, b"hello"); + } + + /// Verify FICLONE (reflink) works between two files in /var/tmp. + /// This will return EOPNOTSUPP on filesystems that don't support it + /// (e.g. ext4), which is acceptable — we just need to confirm the + /// ioctl itself is reachable and returns a meaningful error code. + #[test] + fn test_ficlone_or_enotsup() { + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let dir_fd = rustix::fs::open( + dir.path(), + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .unwrap(); + + // Source file with some content + let src_fd = openat( + &dir_fd, + c"src", + OFlags::RDWR | OFlags::CREATE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .unwrap(); + rustix::io::write(&src_fd, b"reflink-test-content").unwrap(); + + // Destination — O_TMPFILE + let dst_fd = openat( + &dir_fd, + c".", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .unwrap(); + + match rustix::fs::ioctl_ficlone(&dst_fd, &src_fd) { + Ok(()) => { + // Reflinks supported — verify content matches + let proc_path = format!("/proc/self/fd/{}", dst_fd.as_fd().as_raw_fd()); + linkat( + rustix::fs::CWD, + proc_path.as_str(), + &dir_fd, + "dst", + AtFlags::SYMLINK_FOLLOW, + ) + .unwrap(); + let contents = std::fs::read(dir.path().join("dst")).unwrap(); + assert_eq!(contents, b"reflink-test-content"); + eprintln!("FICLONE: supported on this filesystem"); + } + Err(rustix::io::Errno::OPNOTSUPP) | Err(rustix::io::Errno::XDEV) => { + eprintln!( + "FICLONE: not supported on this filesystem (EOPNOTSUPP/EXDEV) — fallback to copy is correct" + ); + } + Err(e) => panic!("Unexpected FICLONE error: {e}"), + } + } + + /// Verify that user.ostreemeta xattr can be set on a regular file + /// without root in /var/tmp. + #[test] + fn test_user_ostreemeta_xattr() { + use rustix::fs::{XattrFlags, fgetxattr, fsetxattr}; + + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let f = tempfile::NamedTempFile::new_in(dir.path()).unwrap(); + let fd = rustix::fs::open(f.path(), OFlags::RDWR | OFlags::CLOEXEC, Mode::empty()).unwrap(); + + let meta_bytes = b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xed\x00\x00\x00\x00"; + fsetxattr(&fd, c"user.ostreemeta", meta_bytes, XattrFlags::empty()) + .expect("setting user.ostreemeta should work without root"); + + let mut buf = vec![0u8; 256]; + let n = fgetxattr(&fd, c"user.ostreemeta", &mut buf).unwrap(); + assert_eq!(&buf[..n], meta_bytes); + } +} diff --git a/crates/ostree-ext/src/container/mod.rs b/crates/ostree-ext/src/container/mod.rs index 0fec7f757..df175448b 100644 --- a/crates/ostree-ext/src/container/mod.rs +++ b/crates/ostree-ext/src/container/mod.rs @@ -613,6 +613,7 @@ pub fn version_for_config(config: &oci_spec::image::ImageConfiguration) -> Optio None } +pub mod composefs_import; pub mod deploy; mod encapsulate; pub use encapsulate::*; diff --git a/crates/ostree-ext/src/container/store.rs b/crates/ostree-ext/src/container/store.rs index ea0665d40..0df3d67c5 100644 --- a/crates/ostree-ext/src/container/store.rs +++ b/crates/ostree-ext/src/container/store.rs @@ -137,6 +137,7 @@ use camino::{Utf8Path, Utf8PathBuf}; use canon_json::CanonJsonSerialize; use cap_std_ext::cap_std; use cap_std_ext::cap_std::fs::{Dir, MetadataExt}; +use gvariant::{Marker as _, Structure as _}; use cap_std_ext::dirext::CapStdExtDirExt; use containers_image_proxy::{ImageProxy, OpenedImage}; @@ -172,12 +173,17 @@ const IMAGE_PREFIX: &str = "ostree/container/image"; /// ref with the project name, so the final ref may be of the form e.g. `ostree/container/baseimage/bootc/foo`. pub const BASE_IMAGE_PREFIX: &str = "ostree/container/baseimage"; -/// The key injected into the merge commit for the manifest digest. -pub(crate) const META_MANIFEST_DIGEST: &str = "ostree.manifest-digest"; -/// The key injected into the merge commit with the manifest serialized as JSON. -const META_MANIFEST: &str = "ostree.manifest"; -/// The key injected into the merge commit with the image configuration serialized as JSON. -const META_CONFIG: &str = "ostree.container.image-config"; +/// The key for the manifest digest in ostree commit metadata. +pub const META_MANIFEST_DIGEST: &str = "ostree.manifest-digest"; +/// The key for the serialized OCI manifest in ostree commit metadata. +pub const META_MANIFEST: &str = "ostree.manifest"; +/// The key for the serialized OCI image configuration in ostree commit metadata. +pub const META_CONFIG: &str = "ostree.container.image-config"; +/// Commit metadata key written by the composefs import path to indicate this commit +/// was synthesized from a composefs repository rather than imported layer-by-layer. +/// When present, per-layer blob refs do not exist and `query_image_commit` should +/// use the merge commit itself as the base commit. +pub const META_COMPOSEFS_SYNTHESIZED: &str = "ostree-ext.composefs-synthesized"; /// The type used to store content filtering information. pub type MetaFilteredData = HashMap>; @@ -197,7 +203,7 @@ fn ref_for_layer(l: &oci_image::Descriptor) -> Result { } /// Convert e.g. sha256:12345... into `/ostree/container/blob/sha256_2B12345...`. -fn ref_for_image(l: &ImageReference) -> Result { +pub fn ref_for_image(l: &ImageReference) -> Result { refescape::prefix_escape_for_ref(IMAGE_PREFIX, &l.to_string()) } @@ -585,7 +591,7 @@ pub(crate) fn parse_ostree_manifest_layout<'a>( } /// Find the timestamp of the manifest (or config), ignoring errors. -fn timestamp_of_manifest_or_config( +pub(crate) fn timestamp_of_manifest_or_config( manifest: &ImageManifest, config: &ImageConfiguration, ) -> Option { @@ -1624,6 +1630,58 @@ pub fn list_images(repo: &ostree::Repo) -> Result> { .collect() } +/// Recursively walk a dirtree GVariant, calling `f` with each file's hex content checksum. +/// +/// Reads `(a(say)a(sayay))` — the ostree DIRTREE format — directly from GVariant bytes, +/// the same approach used by [`crate::ima`]. Subdirectories are visited depth-first. +fn walk_dirtree_checksums( + repo: &ostree::Repo, + dirtree_checksum: &str, + f: &mut impl FnMut(&str) -> Result<()>, +) -> Result<()> { + use crate::objgv::gv_dirtree; + use gvariant::aligned_bytes::TryAsAligned as _; + + let v = repo.load_variant(ostree::ObjectType::DirTree, dirtree_checksum)?; + let bytes = v.data_as_bytes(); + let bytes = bytes.try_as_aligned()?; + let tree = gv_dirtree!().cast(bytes); + let (files, dirs) = tree.to_tuple(); + + let mut hexbuf = [0u8; 64]; + for file in files { + let (_name, csum) = file.to_tuple(); + hex::encode_to_slice(csum, &mut hexbuf)?; + f(std::str::from_utf8(&hexbuf)?)?; + } + for dir in dirs { + let (_name, contents_csum, _meta_csum) = dir.to_tuple(); + hex::encode_to_slice(contents_csum, &mut hexbuf)?; + walk_dirtree_checksums(repo, std::str::from_utf8(&hexbuf)?, f)?; + } + Ok(()) +} + +/// Call `f` once for each regular-file content checksum (hex SHA-256) reachable from +/// `commit_checksum`, reading dirtree GVariants directly without GObject overhead. +/// +/// Subtrees are visited depth-first. Shared dirtree objects that appear more than once +/// in the tree will be visited multiple times. +pub fn for_each_file_checksum( + repo: &ostree::Repo, + commit_checksum: &str, + f: &mut impl FnMut(&str) -> Result<()>, +) -> Result<()> { + use crate::objgv::gv_commit; + use gvariant::aligned_bytes::TryAsAligned as _; + + let (commit_v, _) = repo.load_commit(commit_checksum)?; + let commit_bytes = commit_v.data_as_bytes(); + let commit_bytes = commit_bytes.try_as_aligned()?; + let root_dirtree = hex::encode(gv_commit!().cast(commit_bytes).to_tuple().6); + walk_dirtree_checksums(repo, &root_dirtree, f) +} + /// Attempt to query metadata for a pulled image; if it is corrupted, /// the error is printed to stderr and None is returned. fn try_query_image( @@ -1745,11 +1803,21 @@ pub fn query_image_commit(repo: &ostree::Repo, commit: &str) -> Result(META_COMPOSEFS_SYNTHESIZED)? + .unwrap_or(false); + let base_commit = if is_composefs_synthesized { + merge_commit.clone() + } else { + let base_layer = query_layer(repo, base_layer)?; + let ostree_ref = base_layer.ostree_ref.as_str(); + base_layer + .commit + .ok_or_else(|| anyhow!("Missing base image ref {ostree_ref}"))? + }; let detached_commitmeta = repo.read_commit_detached_metadata(&merge_commit, gio::Cancellable::NONE)?; diff --git a/crates/ostree-ext/tests/it/main.rs b/crates/ostree-ext/tests/it/main.rs index 14c6dd3b3..3b7531564 100644 --- a/crates/ostree-ext/tests/it/main.rs +++ b/crates/ostree-ext/tests/it/main.rs @@ -2365,3 +2365,230 @@ async fn test_diff_id_reuse_across_compression() -> Result<()> { Ok(()) } + +/// Build a minimal OCI image (single gzip layer) into the given OCI directory. +/// +/// The tar layer explicitly includes parent directory entries before files, so +/// that composefs's strict layer importer does not choke on missing parents. +fn build_test_oci(oci_dir: &ocidir::OciDir) -> Result<()> { + use ocidir::oci_spec::image::ImageConfigurationBuilder; + let mut config = ImageConfigurationBuilder::default().build()?; + let mut manifest = oci_dir.new_empty_manifest()?.build()?; + + let bw = oci_dir.create_gzip_layer(None)?; + let mut tb = tar::Builder::new(bw); + + let dir_header = |path: &str| -> tar::Header { + let mut h = tar::Header::new_ustar(); + h.set_entry_type(tar::EntryType::Directory); + h.set_mode(0o755); + h.set_uid(0); + h.set_gid(0); + h.set_mtime(0); + h.set_size(0); + // The tar path for directories must end in / + let _ = path; // consumed by caller + h + }; + let reg_header = |data: &[u8]| -> tar::Header { + let mut h = tar::Header::new_ustar(); + h.set_entry_type(tar::EntryType::Regular); + h.set_mode(0o644); + h.set_uid(0); + h.set_gid(0); + h.set_mtime(0); + h.set_size(data.len() as u64); + h + }; + + // Parent directories must come before their children. + let mut dh = dir_header("usr/"); + tb.append_data(&mut dh, "usr/", std::io::empty())?; + let mut dh = dir_header("usr/bin/"); + tb.append_data(&mut dh, "usr/bin/", std::io::empty())?; + let mut dh = dir_header("usr/lib/"); + tb.append_data(&mut dh, "usr/lib/", std::io::empty())?; + + let bash_data = b"#!/bin/bash\n"; + let mut fh = reg_header(bash_data); + tb.append_data(&mut fh, "usr/bin/bash", std::io::Cursor::new(bash_data))?; + + let config_data = b"# test config\n"; + let mut fh = reg_header(config_data); + tb.append_data( + &mut fh, + "usr/lib/test.conf", + std::io::Cursor::new(config_data), + )?; + + // Add /etc at the OCI image root — this must be remapped to usr/etc + // in the ostree commit so that deployment initialization can find it. + let mut dh = dir_header("etc/"); + tb.append_data(&mut dh, "etc/", std::io::empty())?; + + let hostname_data = b"testhost\n"; + let mut fh = reg_header(hostname_data); + tb.append_data(&mut fh, "etc/hostname", std::io::Cursor::new(hostname_data))?; + + let bw = tb.into_inner()?; + let layer = bw.complete()?; + + oci_dir.push_layer_full( + &mut manifest, + &mut config, + layer, + None::>, + "test layer", + chrono::DateTime::UNIX_EPOCH, + ); + let config_desc = oci_dir.write_config(config)?; + manifest.set_config(config_desc); + oci_dir.replace_with_single_manifest(manifest, ocidir::oci_spec::image::Platform::default())?; + Ok(()) +} + +/// Test that we can synthesize an ostree commit from a composefs repository. +/// +/// The flow is: +/// 1. Build a minimal OCI image into a local OCI directory +/// 2. Pull it into a fresh composefs repository +/// 3. Call `composefs_import::import_from_composefs_repo` +/// 4. Verify the resulting ostree commit has the expected OCI metadata and files +#[tokio::test] +async fn test_composefs_import_to_ostree() -> Result<()> { + use composefs_ctl::composefs::fsverity::{Algorithm, DEFAULT_LG_BLOCKSIZE, Sha512HashValue}; + use composefs_ctl::composefs::repository::Repository; + use composefs_ctl::composefs_oci; + use composefs_ctl::composefs_oci::oci_image::OciImage; + use ostree_ext::container::composefs_import; + use ostree_ext::container::store::{META_CONFIG, META_MANIFEST, META_MANIFEST_DIGEST}; + use std::sync::Arc; + + if !check_skopeo() { + eprintln!("Skipping test_composefs_import_to_ostree: skopeo not available"); + return Ok(()); + } + + let tempdir = tempfile::tempdir_in("/var/tmp")?; + let path: &camino::Utf8Path = tempdir.path().try_into().unwrap(); + + // Step 1: build a minimal OCI image + let oci_path = path.join("test.oci"); + std::fs::create_dir(&oci_path)?; + let oci_cap_dir = cap_std::fs::Dir::open_ambient_dir(&oci_path, cap_std::ambient_authority())?; + let oci_dir = ocidir::OciDir::ensure(oci_cap_dir)?; + build_test_oci(&oci_dir)?; + + // Step 2: create a composefs repository and pull the OCI image into it + let cfs_path = path.join("composefs-repo"); + std::fs::create_dir(&cfs_path)?; + let cfs_dirfd = cap_std::fs::Dir::open_ambient_dir(&cfs_path, cap_std::ambient_authority())?; + let algo = Algorithm::Sha512 { + lg_blocksize: DEFAULT_LG_BLOCKSIZE, + }; + let (cfs_repo, _) = Repository::::init_path(cfs_dirfd, ".", algo, false) + .context("Initializing composefs repository")?; + let cfs_repo = Arc::new(cfs_repo); + + let oci_imgref = format!("oci:{}", oci_path); + let pull_result = composefs_oci::pull(&cfs_repo, &oci_imgref, None, Default::default()) + .await + .context("Pulling OCI image into composefs repo")?; + + // Step 3: get manifest + config from the composefs repo + let oci_image = OciImage::open(&cfs_repo, &pull_result.manifest_digest, None) + .context("Opening OCI image from composefs repo")?; + let manifest = oci_image.manifest().clone(); + let config = oci_image + .config() + .cloned() + .context("OCI image has no config")?; + + // Step 4: create a destination ostree repo and run the import + let ost_repo = ostree::Repo::new(&gio::File::for_path(path.join("ostree-repo"))); + ost_repo + .create(ostree::RepoMode::BareUser, gio::Cancellable::NONE) + .context("Creating destination ostree repo")?; + + let manifest_digest_str = pull_result.manifest_digest.to_string(); + let commit = composefs_import::import_from_composefs_repo( + &ost_repo, + &cfs_repo, + &pull_result.config_digest, + &manifest_digest_str, + &manifest, + &config, + false, + ) + .context("Importing composefs repo into ostree")?; + + // Step 5: verify + assert!(!commit.is_empty(), "commit checksum should be non-empty"); + + let (commitv, state) = ost_repo.load_commit(&commit)?; + assert_eq!(state, ostree::RepoCommitState::NORMAL); + + // Commit metadata should contain the three OCI keys + let commit_meta = commitv.child_value(0); + let commitmeta = glib::VariantDict::new(Some(&commit_meta)); + assert!( + commitmeta + .lookup_value(META_MANIFEST_DIGEST, None) + .is_some(), + "commit should have manifest-digest metadata" + ); + assert!( + commitmeta.lookup_value(META_MANIFEST, None).is_some(), + "commit should have manifest metadata" + ); + assert!( + commitmeta.lookup_value(META_CONFIG, None).is_some(), + "commit should have config metadata" + ); + // The stored digest should round-trip + let stored_digest = commitmeta + .lookup::(META_MANIFEST_DIGEST)? + .expect("manifest-digest key"); + assert_eq!(stored_digest, manifest_digest_str); + + // The file tree should have the files we put in the layer + let (root, _) = ost_repo.read_commit(&commit, gio::Cancellable::NONE)?; + for path in ["usr", "usr/bin", "usr/bin/bash", "usr/lib/test.conf"] { + let node = root.resolve_relative_path(path); + assert!( + node.query_exists(gio::Cancellable::NONE), + "expected {path} to exist in the ostree commit" + ); + } + + // Verify /etc was remapped to usr/etc (not left at the root as etc/) + let usr_etc_hostname = root.resolve_relative_path("usr/etc/hostname"); + assert!( + usr_etc_hostname.query_exists(gio::Cancellable::NONE), + "expected usr/etc/hostname to exist (remapped from /etc/hostname)" + ); + + // Verify there is NO top-level etc/ — it should have been remapped + let root_etc = root.resolve_relative_path("etc"); + assert!( + !root_etc.query_exists(gio::Cancellable::NONE), + "top-level etc/ should not exist; it should be remapped to usr/etc/" + ); + + // Run `ostree fsck` against the repo to verify all objects pass integrity + // checks — in particular that file content checksums match, that + // user.ostreemeta xattrs are present and well-formed, and that no + // object is corrupted. + let ostree_repo_path = path.join("ostree-repo"); + let st = std::process::Command::new("ostree") + .args(["fsck", "--repo"]) + .arg(&ostree_repo_path) + .status() + .context("running `ostree fsck`")?; + assert!( + st.success(), + "`ostree fsck` failed on the composefs-imported repo" + ); + + Ok(()) +} diff --git a/crates/xtask/src/bcvk.rs b/crates/xtask/src/bcvk.rs index 7d883d205..55975688a 100644 --- a/crates/xtask/src/bcvk.rs +++ b/crates/xtask/src/bcvk.rs @@ -65,15 +65,15 @@ impl BcvkInstallOpts { /// Return the install-related args for `bcvk libvirt run`. /// /// This covers `--composefs-backend`, `--filesystem`, `--bootloader`, - /// and `--karg` flags. Note that `--bootloader` and `--filesystem` - /// are only valid when `--composefs-backend` is also set (bcvk - /// enforces this via a clap `requires` relationship). + /// and `--karg` flags. `--bootloader` is composefs-only, but + /// `--filesystem` applies to both ostree and composefs backends. pub(crate) fn install_args(&self) -> Vec { let mut args = Vec::new(); + if let Some(fs) = &self.filesystem { + args.push(format!("--filesystem={fs}")); + } if self.composefs_backend { args.push("--composefs-backend".into()); - let fs = self.filesystem.as_deref().unwrap_or("ext4"); - args.push(format!("--filesystem={fs}")); if let Some(b) = &self.bootloader { args.push(format!("--bootloader={b}")); } diff --git a/crates/xtask/src/tmt.rs b/crates/xtask/src/tmt.rs index 0152285ef..505790a44 100644 --- a/crates/xtask/src/tmt.rs +++ b/crates/xtask/src/tmt.rs @@ -19,6 +19,7 @@ const COMMON_INST_ARGS: &[&str] = &["--label=bootc.test=1"]; // Metadata field names const FIELD_TRY_BIND_STORAGE: &str = "try_bind_storage"; +const FIELD_BOOTC_INSTALL_ARGS: &str = "bootc_install_args"; const FIELD_SUMMARY: &str = "summary"; const FIELD_ADJUST: &str = "adjust"; @@ -212,6 +213,7 @@ struct PlanMetadata { try_bind_storage: bool, skip_if_composefs: bool, skip_if_uki: bool, + bootc_install_args: Vec, } /// Parse integration.fmf to extract extra-try_bind_storage for all plans @@ -254,6 +256,7 @@ fn parse_plan_metadata( try_bind_storage: b, skip_if_uki: false, skip_if_composefs: false, + bootc_install_args: Vec::new(), }); } } @@ -270,6 +273,7 @@ fn parse_plan_metadata( skip_if_composefs: b, skip_if_uki: false, try_bind_storage: false, + bootc_install_args: Vec::new(), }); } } @@ -286,6 +290,25 @@ fn parse_plan_metadata( skip_if_uki: b, skip_if_composefs: false, try_bind_storage: false, + bootc_install_args: Vec::new(), + }); + } + } + + if let Some(install_args_val) = plan_data.get(&serde_yaml::Value::String(format!( + "extra-{}", + FIELD_BOOTC_INSTALL_ARGS + ))) { + if let Some(s) = install_args_val.as_str() { + let args: Vec = s.split_whitespace().map(|a| a.to_string()).collect(); + plan_metadata + .entry(plan_name.to_string()) + .and_modify(|m| m.bootc_install_args = args.clone()) + .or_insert(PlanMetadata { + bootc_install_args: args, + skip_if_uki: false, + skip_if_composefs: false, + try_bind_storage: false, }); } } @@ -478,6 +501,17 @@ pub(crate) fn run_tmt(sh: &Shell, args: &RunTmtArgs) -> Result<()> { } } + // Add per-plan bootc install args as --install-arg= + let bootc_install_args = plan_metadata + .iter() + .find(|(key, _)| plan.ends_with(key.as_str())) + .map(|(_, v)| v.bootc_install_args.clone()) + .unwrap_or_default(); + + for arg in &bootc_install_args { + opts.push(format!("--install-arg={arg}")); + } + opts.extend(bcvk_opts.install_args()); opts @@ -908,6 +942,8 @@ struct TestDef { skip_if_composefs: bool, /// Whether to skip this test for images with UKI skip_if_uki: bool, + /// Extra bootc install args to pass via --install-arg= + bootc_install_args: Vec, /// TMT fmf attributes to pass through (summary, duration, adjust, etc.) tmt: serde_yaml::Value, } @@ -1088,6 +1124,18 @@ fn generate_integration() -> Result<(String, String)> { .and_then(|v| v.as_bool()) .unwrap_or(false); + let bootc_install_args: Vec = metadata + .extra + .as_mapping() + .and_then(|m| { + m.get(&serde_yaml::Value::String( + FIELD_BOOTC_INSTALL_ARGS.to_string(), + )) + }) + .and_then(|v| v.as_str()) + .map(|s| s.split_whitespace().map(|a| a.to_string()).collect()) + .unwrap_or_default(); + tests.push(TestDef { number: metadata.number, name: display_name, @@ -1095,6 +1143,7 @@ fn generate_integration() -> Result<(String, String)> { try_bind_storage, skip_if_composefs, skip_if_uki, + bootc_install_args, tmt: metadata.tmt, }); } @@ -1224,6 +1273,13 @@ fn generate_integration() -> Result<(String, String)> { ); } + if !test.bootc_install_args.is_empty() { + plan_value.insert( + serde_yaml::Value::String(format!("extra-{}", FIELD_BOOTC_INSTALL_ARGS)), + serde_yaml::Value::String(test.bootc_install_args.join(" ")), + ); + } + plans_mapping.insert( serde_yaml::Value::String(plan_key), serde_yaml::Value::Mapping(plan_value), @@ -1362,6 +1418,47 @@ set -eux assert!(tmt.contains_key(&serde_yaml::Value::String("adjust".to_string()))); } + #[test] + fn test_parse_plan_metadata_bootc_install_args() { + // Simulate the integration.fmf content for plan-44 + let fmf_content = r#" +/plan-44-readonly-unified: + summary: Run readonly tests after onboarding to unified storage + discover: + how: fmf + test: + - /tmt/tests/tests/test-44-readonly-unified + extra-bootc_install_args: --experimental-unified-storage +"#; + let tmp = tempfile::NamedTempFile::new().unwrap(); + std::fs::write(tmp.path(), fmf_content).unwrap(); + let path = camino::Utf8Path::from_path(tmp.path()).unwrap(); + + let metadata = parse_plan_metadata(path).unwrap(); + let plan_meta = metadata.get("/plan-44-readonly-unified").unwrap(); + assert_eq!( + plan_meta.bootc_install_args, + vec!["--experimental-unified-storage"] + ); + assert!(!plan_meta.try_bind_storage); + + // Simulate the plan name lookup as done in the execution loop + let plan = "/tmt/plans/integration/plan-44-readonly-unified"; + let found = metadata + .iter() + .find(|(key, _)| plan.ends_with(key.as_str())) + .map(|(_, v)| v.bootc_install_args.clone()) + .unwrap_or_default(); + assert_eq!(found, vec!["--experimental-unified-storage"]); + + // Verify the opts vec gets --install-arg= entries + let mut opts: Vec = vec![]; + for arg in &found { + opts.push(format!("--install-arg={arg}")); + } + assert_eq!(opts, vec!["--install-arg=--experimental-unified-storage"]); + } + #[test] fn test_parse_tmt_metadata_with_try_bind_storage() { let content = r#"# number: 24 diff --git a/docs/src/experimental-unified-storage.md b/docs/src/experimental-unified-storage.md index 68d5ea142..04078615c 100644 --- a/docs/src/experimental-unified-storage.md +++ b/docs/src/experimental-unified-storage.md @@ -148,12 +148,34 @@ Once unified storage is enabled, podman can access the booted image: podman --storage-opt=additionalimagestore=/usr/lib/bootc/storage run localhost/bootc ``` -## Relationship to composefs backend - -Unified storage is complementary to the [composefs backend](experimental-composefs.md). -While unified storage changes *how images are pulled* (using containers/storage), -the composefs backend changes *how the filesystem is stored and verified*. -These features can potentially be combined in the future. +## Relationship to the composefs-first import pipeline + +The containers/storage unified storage path described above is distinct from the +composefs-first import pipeline implemented in this codebase. The two approaches +solve overlapping problems but are architecturally different. + +The composefs-first path fetches the OCI image directly into the composefs OCI +repository (under `/sysroot/ostree/repo/composefs`) and then synthesizes an ostree +commit from that repository without a tar round-trip. For each external regular +file it uses `FICLONE` (reflink) to populate the corresponding ostree content object +from the composefs object. Because reflinks share underlying disk extents while +keeping independent inodes, each ostree object can carry its own metadata (uid, gid, +mode, SELinux label) even when two paths have identical bytes and share a single +composefs object. This is why reflink is used instead of hardlink: a single inode +can hold only one `security.selinux` value, so hardlinking would corrupt one of the +labels. + +This design requires a reflink-capable filesystem. On XFS or btrfs, FICLONE +succeeds and the ostree and composefs repositories share disk blocks — no extra +space is consumed for the ostree content objects. On ext4, FICLONE fails with +`EOPNOTSUPP` and the code falls back to a byte copy; installation works correctly +but without block sharing between the two repositories. + +SELinux labels are applied in-memory (via `selabel()` from composefs-rs) before +the ostree checksums are computed. On a container host where SELinux is disabled, +the `fsetxattr` call is a no-op; the kernel applies matching labels at first boot +during auto-relabeling. Because the checksums include the correct label values, +`ostree fsck` passes after boot without any special handling at install time. ## Limitations diff --git a/docs/src/host-v1.schema.json b/docs/src/host-v1.schema.json index db716c3f1..f6213ae3d 100644 --- a/docs/src/host-v1.schema.json +++ b/docs/src/host-v1.schema.json @@ -345,6 +345,17 @@ } ] }, + "storage": { + "description": "Storage subsystem state.", + "anyOf": [ + { + "$ref": "#/$defs/StorageStatus" + }, + { + "type": "null" + } + ] + }, "type": { "description": "The detected type of system", "anyOf": [ @@ -509,6 +520,19 @@ } } }, + "StorageStatus": { + "description": "Storage subsystem status.", + "type": "object", + "properties": { + "unified": { + "description": "The state of unified block-sharing storage.", + "$ref": "#/$defs/UnifiedStorageState" + } + }, + "required": [ + "unified" + ] + }, "Store": { "description": "The container storage backend", "oneOf": [ @@ -518,6 +542,26 @@ "const": "ostreeContainer" } ] + }, + "UnifiedStorageState": { + "description": "The state of unified storage (composefs+ostree shared block storage).\n\nSerialized as kebab-case strings (`\"disabled\"`, `\"enabled\"`, `\"enabled-with-copy\"`)\nto match the `[install.storage] unified` install config values.", + "oneOf": [ + { + "description": "Unified storage is not enabled; composefs and ostree repos do not share blocks.", + "type": "string", + "const": "disabled" + }, + { + "description": "Unified storage is enabled; images are stored via reflinks (zero physical copy).", + "type": "string", + "const": "enabled" + }, + { + "description": "Unified storage is enabled but may use byte copies when reflinks are unavailable.\n\nCorresponds to `storage.unified = \"enabled-with-copy\"` in the install config,\nor to systems onboarded via `bootc image set-unified` (which always allows\ncopies since it runs at runtime rather than at install time).", + "type": "string", + "const": "enabled-with-copy" + } + ] } } } diff --git a/docs/src/man/bootc-install-to-disk.8.md b/docs/src/man/bootc-install-to-disk.8.md index 5b4a7bd39..c0374b40d 100644 --- a/docs/src/man/bootc-install-to-disk.8.md +++ b/docs/src/man/bootc-install-to-disk.8.md @@ -119,7 +119,7 @@ set `discoverable-partitions = true` in their install configuration **--skip-fetch-check** - Verify the image can be fetched from the bootc image. Updates may fail when the installation host is authenticated with the registry but the pull secret is not in the bootc image + Skip the fetch check (inverse of `--run-fetch-check`). When true, the pre-install registry reachability probe is skipped. Useful when the caller has already verified connectivity or when installing in an air-gapped environment **--disable-selinux** diff --git a/docs/src/man/bootc-install-to-existing-root.8.md b/docs/src/man/bootc-install-to-existing-root.8.md index 4b4f8925d..06645d9d3 100644 --- a/docs/src/man/bootc-install-to-existing-root.8.md +++ b/docs/src/man/bootc-install-to-existing-root.8.md @@ -165,7 +165,7 @@ of migrating the fstab entries. See the "Injecting kernel arguments" section abo **--skip-fetch-check** - Verify the image can be fetched from the bootc image. Updates may fail when the installation host is authenticated with the registry but the pull secret is not in the bootc image + Skip the fetch check (inverse of `--run-fetch-check`). When true, the pre-install registry reachability probe is skipped. Useful when the caller has already verified connectivity or when installing in an air-gapped environment **--disable-selinux** diff --git a/docs/src/man/bootc-install-to-filesystem.8.md b/docs/src/man/bootc-install-to-filesystem.8.md index f46892de3..950c136b8 100644 --- a/docs/src/man/bootc-install-to-filesystem.8.md +++ b/docs/src/man/bootc-install-to-filesystem.8.md @@ -73,7 +73,7 @@ is currently expected to be empty by default. **--skip-fetch-check** - Verify the image can be fetched from the bootc image. Updates may fail when the installation host is authenticated with the registry but the pull secret is not in the bootc image + Skip the fetch check (inverse of `--run-fetch-check`). When true, the pre-install registry reachability probe is skipped. Useful when the caller has already verified connectivity or when installing in an air-gapped environment **--disable-selinux** diff --git a/tmt/plans/integration.fmf b/tmt/plans/integration.fmf index b3366648b..e319885ca 100644 --- a/tmt/plans/integration.fmf +++ b/tmt/plans/integration.fmf @@ -137,14 +137,6 @@ execute: - /tmt/tests/tests/test-29-soft-reboot-selinux-policy extra-fixme_skip_if_uki: true -/plan-30-install-unified-flag: - summary: Test bootc install with experimental unified storage flag - discover: - how: fmf - test: - - /tmt/tests/tests/test-30-install-unified-flag - extra-fixme_skip_if_composefs: true - /plan-31-switch-mutate-in-place: summary: switch --mutate-in-place discover: @@ -270,10 +262,26 @@ execute: test: - /tmt/tests/tests/test-43-switch-same-digest +/plan-44-readonly-unified: + summary: Readonly tests after onboarding to unified storage via set-unified + discover: + how: fmf + test: + - /tmt/tests/tests/test-44-readonly-unified + /plan-44-shadow-fixup: summary: Test bootc-sysusers-shadow-sync removes orphaned gshadow entries before sysusers discover: how: fmf test: - /tmt/tests/tests/test-44-shadow-fixup + +/plan-45-install-unified-flag: + summary: Readonly tests after install with --experimental-unified-storage + discover: + how: fmf + test: + - /tmt/tests/tests/test-45-install-unified-flag + extra-fixme_skip_if_composefs: true + extra-bootc_install_args: --experimental-unified-storage # END GENERATED PLANS diff --git a/tmt/tests/booted/bootc_testlib.nu b/tmt/tests/booted/bootc_testlib.nu index 2560d9b13..12ab4409b 100644 --- a/tmt/tests/booted/bootc_testlib.nu +++ b/tmt/tests/booted/bootc_testlib.nu @@ -1,5 +1,59 @@ # A simple nushell "library" for bootc test helpers +# Run standard per-boot checks that should pass on every boot in every test. +# +# Call this at the start of every boot function (first_boot, second_boot, etc.) +# to ensure consistent baseline verification across all tests. Currently runs: +# +# - `bootc internals fsck images`: verifies that every image bootc has +# committed to (composefs GC tag) is also present in containers-storage. +# This is a no-op when unified storage is not enabled, so it is always +# safe to call regardless of the system configuration. +export def initial_status_and_checks [] { + bootc internals fsck images +} + +# Asserts the system is in a unified storage state and performs common verifications +export def verify_unified_storage [] { + use std assert + + # 1. Asserts `/usr/lib/bootc/storage` path exists + assert ("/usr/lib/bootc/storage" | path exists) "/usr/lib/bootc/storage must exist" + + # 2. Asserts `bootc image list --format json | from json | where image_type == "unified"` has at least 1 entry + let images = bootc image list --format json | from json + let unified_images = $images | where image_type == "unified" + assert (($unified_images | length) > 0) "Expected at least one image with type 'unified'" + + # 3. Asserts `/sysroot/composefs/bootc.json` exists + assert ("/sysroot/composefs/bootc.json" | path exists) "/sysroot/composefs/bootc.json must exist" + + # 4. Calls `bootc internals fsck images` (via `bootc_testlib initial_status_and_checks`) + initial_status_and_checks + + # Verify bootc status --json storage field + let st = bootc status --json | from json + let storage_unified = $st.status.storage?.unified? | default "disabled" + assert ($storage_unified != "disabled") $"status JSON: storage.unified must not be disabled, got ($storage_unified)" + + # Verify fsck --json output + let fsck_json = bootc internals fsck images --json | from json + assert ($fsck_json.unifiedStorageEnabled) "fsck JSON: unifiedStorageEnabled must be true" + assert ($fsck_json.ok) "fsck JSON: ok must be true" + # reflinksSupported may be true or false depending on filesystem + assert ("reflinksSupported" in $fsck_json) "fsck JSON: reflinksSupported field must be present" + # Must have checked at least one image — guards against the "0 image(s) OK" + # silent-pass bug where the live-deployment filter skips everything. + assert (($fsck_json.images | length) > 0) "fsck JSON: at least one image must be checked (not silently skipped)" + + # 5. On composefs-native boot (`tap is_composefs`): asserts `/sysroot/composefs/streams` exists and has at least one OCI ref symlink + if (tap is_composefs) { + assert ("/sysroot/composefs/streams" | path exists) "/sysroot/composefs/streams must exist" + let oci_refs = (ls /sysroot/composefs/streams/refs/oci/ | where type == "symlink") + assert (($oci_refs | length) > 0) "At least one OCI ref must exist under /sysroot/composefs/streams/refs/oci/" + } +} + # This is a workaround for what must be a systemd bug # that seems to have appeared in C10S # TODO diagnose and fill in here diff --git a/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu b/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu index 817bc6003..37bc27e74 100644 --- a/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu +++ b/tmt/tests/booted/readonly/011-test-ostree-ext-cli.nu @@ -9,8 +9,14 @@ tap begin "verify bootc wrapping ostree-ext" let st = bootc status --json | from json # Detect composefs by checking if composefs field is present let is_composefs = (tap is_composefs) +# On unified storage installs the ostree commit is synthesized from composefs +# via FICLONE reflinks rather than imported via the ostree-container protocol, +# so the ostree-container image metadata is not available. +let is_unified = ($st.status.storage?.unified? | default "disabled") != "disabled" if $is_composefs { print "# TODO composefs: skipping test - ostree-container commands don't work with composefs" +} else if $is_unified { + print "# unified storage: skipping test - ostree commit synthesized from composefs, no ostree-container metadata" } else { let booted = $st.status.booted.image # Then verify we can extract its metadata via the ostree-container code. diff --git a/tmt/tests/booted/readonly/013-test-aleph.nu b/tmt/tests/booted/readonly/013-test-aleph.nu index ea8d7d706..4856f2a83 100644 --- a/tmt/tests/booted/readonly/013-test-aleph.nu +++ b/tmt/tests/booted/readonly/013-test-aleph.nu @@ -21,6 +21,18 @@ if $is_upgrade { exit 0 } +# After a `bootc switch --transport containers-storage`, the booted transport +# is containers-storage. The aleph file records the original install-time +# registry image, so the cross-check against the current booted image would +# legitimately fail. Skip in this case. +let st = bootc status --json | from json +let booted_transport = $st.status.booted.image.image.transport +if $booted_transport == "containers-storage" { + print "# Skipping aleph cross-check: booted from containers-storage (aleph records original install)" + tap ok + exit 0 +} + # Detect composefs by checking if composefs field is present let is_composefs = (tap is_composefs) if $is_composefs { @@ -35,7 +47,7 @@ if $is_composefs { assert ($aleph.selinux | is-not-empty) "selinux field should be non-empty" # Cross-check aleph fields against the booted image from bootc status - let st = bootc status --json | from json + # (st was already parsed above for the transport check) let booted = $st.status.booted # Verify the digest field matches the booted image digest diff --git a/tmt/tests/booted/readonly/015-test-fsck.nu b/tmt/tests/booted/readonly/015-test-fsck.nu index 8700a5f66..6ddc8afd5 100644 --- a/tmt/tests/booted/readonly/015-test-fsck.nu +++ b/tmt/tests/booted/readonly/015-test-fsck.nu @@ -3,15 +3,17 @@ use tap.nu tap begin "Run fsck" -# Detect composefs by checking if composefs field is present -let st = bootc status --json | from json let is_composefs = (tap is_composefs) if $is_composefs { - print "# TODO composefs: skipping test - fsck requires ostree-booted host" + print "# TODO composefs: skipping low-level fsck - requires ostree-booted host" } else { - # That's it, just ensure we've run a fsck on our basic install. + # Run the full fsck suite (low-level checks + image metadata). bootc internals fsck } +# Image metadata check runs on all backends — it's a no-op when unified +# storage is not enabled, so this is always safe to run. +bootc internals fsck images + tap ok diff --git a/tmt/tests/booted/readonly/032-test-composefs-repo.nu b/tmt/tests/booted/readonly/032-test-composefs-repo.nu new file mode 100644 index 000000000..c71172a84 --- /dev/null +++ b/tmt/tests/booted/readonly/032-test-composefs-repo.nu @@ -0,0 +1,36 @@ +use std assert +use tap.nu + +tap begin "composefs repo always present" + +let st = bootc status --json | from json +let is_composefs = (tap is_composefs) + +# The composefs repository must always exist on a booted bootc system, +# regardless of whether the system was booted via composefs or classic ostree. +# This is a regression guard for the composefs-first fetch pipeline. + +print "# Checking /sysroot/composefs directory structure" + +assert ("/sysroot/composefs" | path exists) "/sysroot/composefs must exist" +assert (("/sysroot/composefs" | path type) == "dir") "/sysroot/composefs must be a directory" + +assert ("/sysroot/composefs/objects" | path exists) "/sysroot/composefs/objects must exist" +assert (("/sysroot/composefs/objects" | path type) == "dir") "/sysroot/composefs/objects must be a directory" + +print $"# /sysroot/composefs exists with objects directory \(is_composefs: ($is_composefs)\)" + +if $is_composefs { + # On a composefs-native boot, the streams/refs tree must also be populated. + print "# Checking composefs streams/refs structure (composefs-native boot)" + + assert ("/sysroot/composefs/streams" | path exists) "/sysroot/composefs/streams must exist on composefs boot" + assert (("/sysroot/composefs/streams" | path type) == "dir") "/sysroot/composefs/streams must be a directory" + + let oci_refs = (ls /sysroot/composefs/streams/refs/oci/ | where type == "symlink") + let n_refs = ($oci_refs | length) + print $"# Found ($n_refs) OCI image ref symlinks under /sysroot/composefs/streams/refs/oci/" + assert ($n_refs > 0) "At least one OCI image ref symlink must exist under /sysroot/composefs/streams/refs/oci/ on a composefs-native booted system" +} + +tap ok diff --git a/tmt/tests/booted/readonly/040-test-baseconfigs.nu b/tmt/tests/booted/readonly/040-test-baseconfigs.nu index a8f6e510d..a6c659984 100644 --- a/tmt/tests/booted/readonly/040-test-baseconfigs.nu +++ b/tmt/tests/booted/readonly/040-test-baseconfigs.nu @@ -74,6 +74,30 @@ for config in $configs { assert equal $driver "overlay" $"podman should use overlay driver on tmpfs /var, got: ($driver)" print $"# podman graph driver: ($driver) ✓" }, + "unified-storage" => { + print "# Checking unified-storage: image must be in unified mode" + # The install config (baked in at image build time) should have caused + # bootc install to enable unified storage, so the booted image must be + # reported as type "unified" from day 1 (no set-unified needed). + let images = (bootc image list --format json | from json) + let unified = ($images | where image_type == "unified") + assert (($unified | length) > 0) "Expected booted image to be unified (installed via unified-storage baseconfig)" + print $"# unified images: ($unified | length) ✓" + + # The bootc-owned storage symlink must be present + assert ("/usr/lib/bootc/storage" | path exists) "/usr/lib/bootc/storage must exist" + print "# /usr/lib/bootc/storage exists ✓" + + # The install config TOML that enabled this must be present in the image + assert ("/usr/lib/bootc/install/00-storage.toml" | path exists) \ + "/usr/lib/bootc/install/00-storage.toml must be present in unified-storage image" + print "# /usr/lib/bootc/install/00-storage.toml exists ✓" + + # fsck must pass + print "# Running bootc internals fsck images" + bootc internals fsck images + print "# fsck passed ✓" + }, _ => { print $"# Unknown baseconfig token: ($config) — skipping" } diff --git a/tmt/tests/booted/test-44-readonly-unified.nu b/tmt/tests/booted/test-44-readonly-unified.nu new file mode 100644 index 000000000..47d8d0318 --- /dev/null +++ b/tmt/tests/booted/test-44-readonly-unified.nu @@ -0,0 +1,71 @@ +# number: 44 +# tmt: +# summary: Readonly tests after onboarding to unified storage via set-unified +# duration: 30m +# +# Two-boot test: +# Boot 0: copy-to-storage + switch to containers-storage transport + reboot. +# This sidesteps any registry dependency so set-unified can work +# fully offline without STORAGE_OPTS/skopeo complexity. +# Boot 1: set-unified (takes the containers-storage fast path), verify, +# then run the full readonly suite. + +use std assert +use tap.nu +use bootc_testlib.nu + +def main [] { + bootc_testlib initial_status_and_checks + match $env.TMT_REBOOT_COUNT? { + null | "0" => first_boot, + "1" => second_boot, + $o => { error make { msg: $"Unexpected TMT_REBOOT_COUNT ($o)" } }, + } +} + +def first_boot [] { + tap begin "copy booted image to containers-storage and switch transport" + + # Copy the booted image into the default /var/lib/containers so that + # set-unified on the next boot can copy from there without a registry pull. + bootc image copy-to-storage + + let st = bootc status --json | from json + let image = $st.status.booted.image.image.image + + # Switch to containers-storage transport so the next boot's set-unified + # sees transport=containers-storage and takes the offline copy path. + bootc switch --transport containers-storage $image + + tmt-reboot +} + +def second_boot [] { + tap begin "readonly tests after onboarding to unified storage" + + # Detect the sysroot filesystem type. On ext4 (no reflink support) we + # must pass --enabled-with-copy; on xfs/btrfs reflinks are available and we + # default to requiring them (the stricter path). + let sysroot_fstype = (findmnt -n -o FSTYPE /sysroot | str trim) + let set_unified_args = if $sysroot_fstype == "ext4" { + print $"# sysroot is ext4 — using --enabled-with-copy for set-unified" + ["--enabled-with-copy"] + } else { + print $"# sysroot is ($sysroot_fstype) — reflinks required" + [] + } + + # Now booted from containers-storage transport — set-unified will copy + # from /var/lib/containers into bootc storage without any network access. + bootc image set-unified ...$set_unified_args + + bootc_testlib verify_unified_storage + + let tests = (ls booted/readonly/*-test-*.nu | get name | sort) + for test_file in $tests { + print $"Running ($test_file)..." + nu $test_file + } + + tap ok +} diff --git a/tmt/tests/booted/test-45-install-unified-flag.nu b/tmt/tests/booted/test-45-install-unified-flag.nu new file mode 100644 index 000000000..05305d5e9 --- /dev/null +++ b/tmt/tests/booted/test-45-install-unified-flag.nu @@ -0,0 +1,29 @@ +# number: 45 +# extra: +# bootc_install_args: --experimental-unified-storage +# fixme_skip_if_composefs: true +# tmt: +# summary: Readonly tests after install with --experimental-unified-storage +# duration: 30m + +use std assert +use tap.nu +use bootc_testlib.nu + +tap begin "readonly tests after install with --experimental-unified-storage" + +let images = bootc image list --format json | from json +let already_unified = ($images | where image_type == "unified" | length) > 0 + +assert $already_unified "System must be in unified storage mode (installed with --experimental-unified-storage)" + +bootc_testlib verify_unified_storage + +# Run the readonly suite +let tests = (ls booted/readonly/*-test-*.nu | get name | sort) +for test_file in $tests { + print $"Running ($test_file)..." + nu $test_file +} + +tap ok diff --git a/tmt/tests/booted/test-composefs-gc-uki.nu b/tmt/tests/booted/test-composefs-gc-uki.nu index e2efd561c..6a87aab28 100644 --- a/tmt/tests/booted/test-composefs-gc-uki.nu +++ b/tmt/tests/booted/test-composefs-gc-uki.nu @@ -5,6 +5,7 @@ use std assert use tap.nu +use bootc_testlib.nu if not (tap is_composefs) { exit 0 @@ -130,6 +131,7 @@ def fourth_boot [] { } def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, diff --git a/tmt/tests/booted/test-composefs-gc.nu b/tmt/tests/booted/test-composefs-gc.nu index d19a01efe..c84118410 100644 --- a/tmt/tests/booted/test-composefs-gc.nu +++ b/tmt/tests/booted/test-composefs-gc.nu @@ -5,6 +5,7 @@ use std assert use tap.nu +use bootc_testlib.nu if not (tap is_composefs) { exit 0 @@ -171,6 +172,7 @@ def fifth_boot [i: int] { } def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, diff --git a/tmt/tests/booted/test-image-pushpull-upgrade.nu b/tmt/tests/booted/test-image-pushpull-upgrade.nu index 708b868ec..605ea7395 100644 --- a/tmt/tests/booted/test-image-pushpull-upgrade.nu +++ b/tmt/tests/booted/test-image-pushpull-upgrade.nu @@ -11,6 +11,7 @@ # Then another build, and reboot into verifying that use std assert use tap.nu +use bootc_testlib.nu const kargsv0 = ["testarg=foo", "othertestkarg", "thirdkarg=bar"] const kargsv1 = ["testarg=foo", "thirdkarg=baz"] @@ -141,6 +142,7 @@ def second_boot [] { # booted from the local container storage and image assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-derived + # We wrote this file let t = open /usr/share/blah.txt | str trim assert equal $t "test content" @@ -191,6 +193,7 @@ def third_boot [] { print "verifying third boot" assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-derived + let t = open /usr/share/blah.txt | str trim assert equal $t "test content2" @@ -218,6 +221,7 @@ def third_boot [] { } def main [] { + bootc_testlib initial_status_and_checks # See https://tmt.readthedocs.io/en/stable/stories/features.html#reboot-during-test match $env.TMT_REBOOT_COUNT? { null | "0" => initial_build, diff --git a/tmt/tests/booted/test-image-upgrade-reboot.nu b/tmt/tests/booted/test-image-upgrade-reboot.nu index f28cff1da..e95236056 100644 --- a/tmt/tests/booted/test-image-upgrade-reboot.nu +++ b/tmt/tests/booted/test-image-upgrade-reboot.nu @@ -17,6 +17,7 @@ # use std assert use tap.nu +use bootc_testlib.nu # This code runs on *each* boot. # Here we just capture information. @@ -118,6 +119,7 @@ def second_boot [] { } def main [] { + bootc_testlib initial_status_and_checks # See https://tmt.readthedocs.io/en/stable/stories/features.html#reboot-during-test match $env.TMT_REBOOT_COUNT? { null | "0" => initial_build, diff --git a/tmt/tests/booted/test-install-unified-flag.nu b/tmt/tests/booted/test-install-unified-flag.nu deleted file mode 100644 index e00ecb01a..000000000 --- a/tmt/tests/booted/test-install-unified-flag.nu +++ /dev/null @@ -1,35 +0,0 @@ -# number: 30 -# extra: -# fixme_skip_if_composefs: true -# tmt: -# summary: Test bootc install with experimental unified storage flag -# duration: 30m -# -# Test bootc install with --experimental-unified-storage flag -# This test performs an actual install to a loopback device and verifies -# the unified storage path is used. - -use std assert -use tap.nu - -# Use an OS-matched target image to avoid version mismatches -# (e.g., XFS features created by newer mkfs.xfs not recognized by older grub2) -let target_image = (tap get_target_image) - -def main [] { - tap begin "install with experimental unified storage flag" - - # Setup filesystem - create a loopback disk image - mkdir /var/mnt - truncate -s 10G disk.img - - # Disable SELinux enforcement for the install (same as test-install-outside-container) - setenforce 0 - - tap run_install $"bootc install to-disk --disable-selinux --via-loopback --filesystem xfs --experimental-unified-storage --source-imgref ($target_image) ./disk.img" - - # Cleanup - rm -f disk.img - - tap ok -} diff --git a/tmt/tests/booted/test-logically-bound-switch.nu b/tmt/tests/booted/test-logically-bound-switch.nu index 298d7ff86..2ea50b6fe 100644 --- a/tmt/tests/booted/test-logically-bound-switch.nu +++ b/tmt/tests/booted/test-logically-bound-switch.nu @@ -17,6 +17,7 @@ use std assert use tap.nu +use bootc_testlib.nu # This code runs on *each* boot. bootc status @@ -110,6 +111,7 @@ def first_boot [] { def second_boot [] { print "verifying second boot after switch" + assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-bound @@ -136,6 +138,7 @@ def second_boot [] { def third_boot [] { print "verifying third boot after upgrade" + assert equal $booted.image.transport containers-storage assert equal $booted.image.image localhost/bootc-bound @@ -154,6 +157,7 @@ def third_boot [] { } def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, diff --git a/tmt/tests/booted/test-rollback.nu b/tmt/tests/booted/test-rollback.nu index 0afe377ac..debc87d40 100644 --- a/tmt/tests/booted/test-rollback.nu +++ b/tmt/tests/booted/test-rollback.nu @@ -107,6 +107,7 @@ def fourth_boot_verify [] { } def main [] { + bootc_testlib initial_status_and_checks # See https://tmt.readthedocs.io/en/stable/stories/features.html#reboot-during-test match $env.TMT_REBOOT_COUNT? { null | "0" => initial_switch, diff --git a/tmt/tests/booted/test-switch-to-unified.nu b/tmt/tests/booted/test-switch-to-unified.nu index 4e8b61442..26e6f04cc 100644 --- a/tmt/tests/booted/test-switch-to-unified.nu +++ b/tmt/tests/booted/test-switch-to-unified.nu @@ -7,6 +7,7 @@ # use std assert use tap.nu +use bootc_testlib.nu # Multi-boot test: boot 0 onboards to unified storage and builds a derived image; # boot 1 verifies we booted into the derived image using containers-storage @@ -17,6 +18,7 @@ let st = bootc status --json | from json let booted = $st.status.booted.image def main [] { + bootc_testlib initial_status_and_checks match $env.TMT_REBOOT_COUNT? { null | "0" => first_boot, "1" => second_boot, @@ -38,7 +40,6 @@ def first_boot [] { } def second_boot [] { - # Onboard to unified storage - this pulls the booted image into bootc storage bootc image set-unified @@ -50,6 +51,15 @@ def second_boot [] { let unified = $images | where image_type == "unified" assert (($unified | length) > 0) "Expected at least one image with type 'unified' after set-unified" + # Verify the unified storage marker was written + assert ("/sysroot/composefs/bootc.json" | path exists) \ + "composefs/bootc.json must exist after set-unified" + print "# composefs/bootc.json exists ✓" + + # Run fsck to verify internal consistency + print "# Verifying unified storage consistency" + bootc_testlib initial_status_and_checks + podman --storage-opt=additionalimagestore=/usr/lib/bootc/storage images let td = mktemp -d @@ -85,6 +95,20 @@ def third_boot [] { let marker = open /usr/share/unified-storage-test.txt | str trim assert equal $marker "unified-storage-test-marker" + # Verify unified storage is still active after reboot into derived image + assert ("/sysroot/composefs/bootc.json" | path exists) \ + "composefs/bootc.json must persist after switching to derived image" + + # The derived image should also be unified + let images = bootc image list --format json | from json + let unified = ($images | where image_type == "unified") + assert (($unified | length) > 0) "Derived image must be unified after switch" + print $"# unified images in derived boot: ($unified | length) ✓" + + # fsck must still pass on the derived image + print "# Verifying unified storage consistency on derived image" + bootc_testlib initial_status_and_checks + # Verify that bootc storage is accessible print "Listing images in bootc storage:" bootc image cmd list diff --git a/tmt/tests/test-30-install-unified-flag.fmf b/tmt/tests/test-30-install-unified-flag.fmf deleted file mode 100644 index ff9faa85b..000000000 --- a/tmt/tests/test-30-install-unified-flag.fmf +++ /dev/null @@ -1,3 +0,0 @@ -summary: Test bootc install with experimental unified storage flag -test: nu booted/test-install-unified-flag.nu -duration: 30m diff --git a/tmt/tests/test-45-install-unified-flag.fmf b/tmt/tests/test-45-install-unified-flag.fmf new file mode 100644 index 000000000..e943ef00b --- /dev/null +++ b/tmt/tests/test-45-install-unified-flag.fmf @@ -0,0 +1,3 @@ +summary: Readonly tests after install with --experimental-unified-storage +test: nu booted/test-45-install-unified-flag.nu +duration: 30m diff --git a/tmt/tests/tests.fmf b/tmt/tests/tests.fmf index 9c6b41713..65328c889 100644 --- a/tmt/tests/tests.fmf +++ b/tmt/tests/tests.fmf @@ -72,11 +72,6 @@ check: duration: 30m test: nu booted/test-soft-reboot-selinux-policy.nu -/test-30-install-unified-flag: - summary: Test bootc install with experimental unified storage flag - duration: 30m - test: nu booted/test-install-unified-flag.nu - /test-31-switch-mutate-in-place: summary: switch --mutate-in-place duration: 30m @@ -169,7 +164,17 @@ check: duration: 10m test: nu booted/test-switch-same-digest.nu +/test-44-readonly-unified: + summary: Readonly tests after onboarding to unified storage via set-unified + duration: 30m + test: nu booted/test-44-readonly-unified.nu + /test-44-shadow-fixup: summary: Test bootc-sysusers-shadow-sync removes orphaned gshadow entries before sysusers duration: 30m test: nu booted/test-44-shadow-fixup.nu + +/test-45-install-unified-flag: + summary: Readonly tests after install with --experimental-unified-storage + duration: 30m + test: nu booted/test-45-install-unified-flag.nu