From 976a537fedcb9fe7f8a3d7baac5fc40213af9f8c Mon Sep 17 00:00:00 2001 From: Leechael Yim Date: Wed, 20 May 2026 02:47:23 +0800 Subject: [PATCH] fix(verifier,vmm): resolve OVMF variant from metadata.json before parsing image name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #678 added the OvmfVariant dispatch so verifiers can pick between the pre-edk2-stable202505 (13-event) and stable202505 (17-event) RTMR[0] layouts, but the resolution chain had a gap: when `vm_config.ovmf_variant` is `None` (any deployment persisted by VMM <= 0.5.11-pre), the verifier jumped straight to parsing `vm_config.image` with `extract_version_from_image_name`, which returns `None` for the real meta-dstack release naming convention `dstack-X.Y.Z-` (the `rsplit('-')` tail is the git-hash segment, not the version). The default fallback `OvmfVariant::Pre202505` then mismatches the actual stable202505 firmware shipped by 0.5.10+ images, producing the RTMR0 mismatch seen during KMS onboarding against a prod CVM running a dstack-0.5.10- image. Close the gap by reading the version straight from metadata.json, which the verifier already loads inside `ensure_image_downloaded` and the VMM already loads inside `Image::load`: - vmm: in `make_vm_config`, derive `ovmf_variant` from `image.info.version` when metadata.json does not declare it explicitly. The resulting vm_config is the explicit source of truth for new deployments. - verifier: thread an `image_ovmf_variant` (resolved from metadata.json's explicit field or its `version` string) through `ImagePaths` and into `compute_measurement_details`, inserting it as a middle priority between `vm_config.ovmf_variant` and the image-name fallback. The image-name fallback remains in place for legacy metadata.json files that predate the `version` field. - Bump MEASUREMENT_CACHE_VERSION to 3 so entries written with the old resolution order (which may have cached the wrong variant) get ignored. The image-name parser (`extract_version_from_image_name`) is left as-is and keeps its `dstack-X.Y.Z[.SUFFIX]` shape requirement — it's no longer on the hot path for any image whose metadata.json carries `version`. --- Cargo.lock | 1 + verifier/src/verification.rs | 34 +++++++++++++++++++++++++++++++--- vmm/Cargo.toml | 1 + vmm/src/app.rs | 11 ++++++++++- 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cc71da65..a1d626dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2668,6 +2668,7 @@ dependencies = [ "clap", "dirs", "dstack-kms-rpc", + "dstack-mr", "dstack-port-forward", "dstack-types", "dstack-vmm-rpc", diff --git a/verifier/src/verification.rs b/verifier/src/verification.rs index 0cc91bda..0994974e 100644 --- a/verifier/src/verification.rs +++ b/verifier/src/verification.rs @@ -121,7 +121,10 @@ fn collect_rtmr_mismatch( // Bump whenever expected RTMR computation changes so stale entries get ignored. // v2: edk2-stable202505 OVMF RTMR[0] layout (added 4 events, reshaped BootOrder // and Boot0000); the legacy 13-event log no longer matches any in-field image. -const MEASUREMENT_CACHE_VERSION: u32 = 2; +// v3: resolve OVMF variant from the image's metadata.json (explicit field, or +// `version`) when vm_config doesn't declare one; previous versions silently +// fell back to image-name parsing which fails for `dstack-X.Y.Z-` dirs. +const MEASUREMENT_CACHE_VERSION: u32 = 3; #[derive(Clone, Serialize, Deserialize)] struct CachedMeasurement { @@ -134,6 +137,11 @@ struct ImagePaths { kernel_path: PathBuf, initrd_path: PathBuf, kernel_cmdline: String, + /// OVMF variant resolved from the image's metadata.json: the explicit + /// `ovmf_variant` field when present, otherwise derived from `version`. + /// `None` only when metadata.json declares neither — callers should then + /// fall back to image-name heuristics. + image_ovmf_variant: Option, } pub struct CvmVerifier { @@ -248,15 +256,25 @@ impl CvmVerifier { kernel_path: &Path, initrd_path: &Path, kernel_cmdline: &str, + image_ovmf_variant: Option, ) -> Result { let firmware = fw_path.display().to_string(); let kernel = kernel_path.display().to_string(); let initrd = initrd_path.display().to_string(); - // Prefer the explicit variant the image declared; fall back to parsing - // the version out of the image name for pre-`ovmf_variant` deployments. + // Resolve OVMF variant in priority order: + // 1. `vm_config.ovmf_variant` — explicit declaration from the VMM + // (only emitted by VMM 0.5.11+; deployments persisted by older + // VMMs are `None` and need a fallback). + // 2. `image_ovmf_variant` — resolved from the downloaded image's + // metadata.json (explicit field, or derived from `version`). + // This catches the common case of an old VMM serving a new image. + // 3. `ovmf_variant_for_image` on `vm_config.image` — last-resort + // parsing of the image directory name, kept only for legacy + // images whose metadata.json predates the `version` field. let ovmf_variant = vm_config .ovmf_variant + .or(image_ovmf_variant) .unwrap_or_else(|| dstack_mr::ovmf_variant_for_image(vm_config.image.as_deref())); let details = dstack_mr::Machine::builder() @@ -295,6 +313,7 @@ impl CvmVerifier { kernel_path: &Path, initrd_path: &Path, kernel_cmdline: &str, + image_ovmf_variant: Option, ) -> Result { self.compute_measurement_details( vm_config, @@ -302,6 +321,7 @@ impl CvmVerifier { kernel_path, initrd_path, kernel_cmdline, + image_ovmf_variant, ) .map(|details| details.measurements) } @@ -313,6 +333,7 @@ impl CvmVerifier { kernel_path: &Path, initrd_path: &Path, kernel_cmdline: &str, + image_ovmf_variant: Option, ) -> Result { let cache_key = Self::vm_config_cache_key(vm_config)?; @@ -326,6 +347,7 @@ impl CvmVerifier { kernel_path, initrd_path, kernel_cmdline, + image_ovmf_variant, )?; if let Err(e) = self.store_measurements_in_cache(&cache_key, &measurements) { @@ -367,6 +389,9 @@ impl CvmVerifier { let fw_path = image_dir.join(&image_info.bios); let kernel_path = image_dir.join(&image_info.kernel); let initrd_path = image_dir.join(&image_info.initrd); + let image_ovmf_variant = image_info + .ovmf_variant + .or_else(|| dstack_mr::ovmf_variant_for_version(&image_info.version).ok()); let kernel_cmdline = image_info.cmdline + " initrd=initrd"; Ok(ImagePaths { @@ -374,6 +399,7 @@ impl CvmVerifier { kernel_path, initrd_path, kernel_cmdline, + image_ovmf_variant, }) } @@ -394,6 +420,7 @@ impl CvmVerifier { &image_paths.kernel_path, &image_paths.initrd_path, &image_paths.kernel_cmdline, + image_paths.image_ovmf_variant, ) } @@ -556,6 +583,7 @@ impl CvmVerifier { &image_paths.kernel_path, &image_paths.initrd_path, &image_paths.kernel_cmdline, + image_paths.image_ovmf_variant, ) .context("Failed to compute expected measurements")?; diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 0376b30e..d34c453a 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -47,6 +47,7 @@ load_config.workspace = true key-provider-client.workspace = true dstack-port-forward.workspace = true dstack-types.workspace = true +dstack-mr.workspace = true hex_fmt.workspace = true lspci.workspace = true base64.workspace = true diff --git a/vmm/src/app.rs b/vmm/src/app.rs index 8b9714cb..d0fdab1c 100644 --- a/vmm/src/app.rs +++ b/vmm/src/app.rs @@ -1188,7 +1188,16 @@ fn make_vm_config(cfg: &Config, manifest: &Manifest, image: &Image) -> Result` + // naming convention. + ovmf_variant: image + .info + .ovmf_variant + .or_else(|| dstack_mr::ovmf_variant_for_version(&image.info.version).ok()), })?; // For backward compatibility config["spec_version"] = serde_json::Value::from(1);