From 51fe9cc98c6b193b7a53a22c78cf55268fddee78 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 15 May 2025 15:56:34 -0400 Subject: [PATCH 01/22] initial inventory integration - no database fixes yet --- Cargo.lock | 1 + dev-tools/omdb/Cargo.toml | 1 + dev-tools/omdb/src/bin/omdb/db.rs | 104 ++++++++++++++--- live-tests/tests/test_nexus_add_remove.rs | 18 ++- nexus-sled-agent-shared/src/inventory.rs | 17 ++- nexus/db-model/src/inventory.rs | 11 +- .../db-queries/src/db/datastore/deployment.rs | 2 +- .../db-queries/src/db/datastore/inventory.rs | 41 +++---- .../src/db/datastore/physical_disk.rs | 13 +-- nexus/db-schema/src/schema.rs | 1 - nexus/inventory/src/builder.rs | 12 +- nexus/inventory/src/collector.rs | 105 +++++++++++++++--- nexus/inventory/src/examples.rs | 50 ++++++--- nexus/reconfigurator/execution/src/dns.rs | 7 +- .../planning/src/blueprint_builder/builder.rs | 11 +- nexus/reconfigurator/planning/src/example.rs | 27 ++--- nexus/reconfigurator/planning/src/planner.rs | 99 +++++++++++------ nexus/reconfigurator/planning/src/system.rs | 41 ++++--- .../background/tasks/sync_service_zone_nat.rs | 22 +++- nexus/types/src/inventory.rs | 28 +++-- schema/crdb/dbinit.sql | 3 - sled-agent/src/rack_setup/plan/service.rs | 11 +- sled-agent/src/rack_setup/service.rs | 12 +- sled-agent/src/sim/sled_agent.rs | 26 ++++- sled-agent/src/sim/storage.rs | 2 +- sled-agent/src/sled_agent.rs | 47 ++++++-- 26 files changed, 493 insertions(+), 219 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9e560f69f8d..88fc97158be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7511,6 +7511,7 @@ dependencies = [ "nexus-inventory", "nexus-reconfigurator-preparation", "nexus-saga-recovery", + "nexus-sled-agent-shared", "nexus-test-utils", "nexus-test-utils-macros", "nexus-types", diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 175fbe972b1..058ba93273a 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -43,6 +43,7 @@ nexus-db-schema.workspace = true nexus-inventory.workspace = true nexus-reconfigurator-preparation.workspace = true nexus-saga-recovery.workspace = true +nexus-sled-agent-shared.workspace = true nexus-types.workspace = true omicron-common.workspace = true omicron-uuid-kinds.workspace = true diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 28a194aed62..652e2965357 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -122,6 +122,9 @@ use nexus_db_queries::db::pagination::Paginator; use nexus_db_queries::db::pagination::paginated; use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_db_queries::db::queries::region_allocation; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; +use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneType; @@ -7330,27 +7333,100 @@ fn inv_collection_print_sleds(collection: &Collection) { println!(" reservation: {reservation:?}, quota: {quota:?}"); } - println!( - " zones generation: {} (count: {})", - sled.omicron_zones.generation, - sled.omicron_zones.zones.len(), - ); + if let Some(config) = &sled.ledgered_sled_config { + inv_collection_print_sled_config("LEDGERED", config); + } else { + println!(" no ledgered sled config"); + } - if sled.omicron_zones.zones.is_empty() { - continue; + if let Some(last_reconciliation) = &sled.last_reconciliation { + if Some(&last_reconciliation.last_reconciled_config) + == sled.ledgered_sled_config.as_ref() + { + println!(" last reconciled config: matches ledgered config"); + } else { + inv_collection_print_sled_config( + "LAST RECONCILED CONFIG", + &last_reconciliation.last_reconciled_config, + ); + let disk_errs = collect_config_reconciler_errors( + &last_reconciliation.external_disks, + ); + let dataset_errs = collect_config_reconciler_errors( + &last_reconciliation.datasets, + ); + let zone_errs = collect_config_reconciler_errors( + &last_reconciliation.zones, + ); + for (label, errs) in [ + ("disk", disk_errs), + ("dataset", dataset_errs), + ("zone", zone_errs), + ] { + if errs.is_empty() { + println!(" all {label}s reconciled successfully"); + } else { + println!( + " {} {label} reconciliation errors:", + errs.len() + ); + for err in errs { + println!(" {err}"); + } + } + } + } } - println!(" ZONES FOUND"); - for z in &sled.omicron_zones.zones { - println!( - " zone {} (type {})", - z.id, - z.zone_type.kind().report_str() - ); + print!(" reconciler task status: "); + match &sled.reconciler_status { + ConfigReconcilerInventoryStatus::NotYetRun => { + println!("not yet run"); + } + ConfigReconcilerInventoryStatus::Running { + config, + started_at, + running_for, + } => { + println!("running for {running_for:?} (since {started_at})"); + if Some(config) == sled.ledgered_sled_config.as_ref() { + println!(" reconciling currently-ledgered config"); + } else { + inv_collection_print_sled_config( + "RECONCILING CONFIG", + config, + ); + } + } + ConfigReconcilerInventoryStatus::Idle { completed_at, ran_for } => { + println!( + "idle (finished at {completed_at} \ + after running for {ran_for:?})" + ); + } } } } +fn collect_config_reconciler_errors( + results: &BTreeMap, +) -> Vec { + results + .iter() + .filter_map(|(id, result)| match result { + ConfigReconcilerInventoryResult::Ok => None, + ConfigReconcilerInventoryResult::Err { message } => { + Some(format!("{id}: {message}")) + } + }) + .collect() +} + +fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { + println!("\n{label} SLED CONFIG"); + todo!("finish this method: {config:?}"); +} + fn inv_collection_print_keeper_membership(collection: &Collection) { println!("\nKEEPER MEMBERSHIP"); for k in &collection.clickhouse_keeper_cluster_membership { diff --git a/live-tests/tests/test_nexus_add_remove.rs b/live-tests/tests/test_nexus_add_remove.rs index 1bf5b0ea7ba..8630d94659b 100644 --- a/live-tests/tests/test_nexus_add_remove.rs +++ b/live-tests/tests/test_nexus_add_remove.rs @@ -186,9 +186,21 @@ async fn test_nexus_add_remove(lc: &LiveTestContext) { let agent = latest_collection.sled_agents.get(&sled_id).expect( "collection information for the sled we added a Nexus to", ); - if agent.omicron_zones.zones.iter().any(|z| z.id == new_zone.id) { - debug!(log, "zone still present in inventory"); - return Err(CondCheckError::<()>::NotYet); + if let Some(config) = &agent.ledgered_sled_config { + if config.zones.iter().any(|z| z.id == new_zone.id) { + debug!(log, "zone still present in ledger"); + return Err(CondCheckError::<()>::NotYet); + } + } + if let Some(config) = agent + .last_reconciliation + .as_ref() + .map(|lr| &lr.last_reconciled_config) + { + if config.zones.iter().any(|z| z.id == new_zone.id) { + debug!(log, "zone still present in inventory"); + return Err(CondCheckError::<()>::NotYet); + } } return Ok(latest_collection); }, diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 4e6653ea808..4b1b525e0e6 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -111,11 +111,12 @@ pub struct Inventory { pub usable_hardware_threads: u32, pub usable_physical_ram: ByteCount, pub reservoir_size: ByteCount, - pub omicron_zones: OmicronZonesConfig, pub disks: Vec, pub zpools: Vec, pub datasets: Vec, - pub omicron_physical_disks_generation: Generation, + pub ledgered_sled_config: Option, + pub reconciler_status: ConfigReconcilerInventoryStatus, + pub last_reconciliation: Option, } /// Describes the last attempt made by the sled-agent-config-reconciler to @@ -196,6 +197,18 @@ pub struct OmicronSledConfig { pub remove_mupdate_override: Option, } +impl Default for OmicronSledConfig { + fn default() -> Self { + Self { + generation: Generation::new(), + disks: IdMap::default(), + datasets: IdMap::default(), + zones: IdMap::default(), + remove_mupdate_override: None, + } + } +} + impl Ledgerable for OmicronSledConfig { fn is_newer_than(&self, other: &Self) -> bool { self.generation > other.generation diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 62abfacdab2..a5aefbf1f60 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -8,7 +8,7 @@ use crate::PhysicalDiskKind; use crate::omicron_zone_config::{self, OmicronZoneNic}; use crate::typed_uuid::DbTypedUuid; use crate::{ - ByteCount, Generation, MacAddr, Name, ServiceKind, SqlU8, SqlU16, SqlU32, + ByteCount, MacAddr, Name, ServiceKind, SqlU8, SqlU16, SqlU32, impl_enum_type, ipv6, }; use anyhow::{Context, Result, anyhow, bail}; @@ -27,8 +27,7 @@ use nexus_db_schema::schema::{ inv_collection, inv_collection_error, inv_dataset, inv_nvme_disk_firmware, inv_omicron_zone, inv_omicron_zone_nic, inv_physical_disk, inv_root_of_trust, inv_root_of_trust_page, inv_service_processor, - inv_sled_agent, inv_sled_omicron_zones, inv_zpool, sw_caboose, - sw_root_of_trust_page, + inv_sled_agent, inv_zpool, sw_caboose, sw_root_of_trust_page, }; use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZoneDataset, OmicronZoneImageSource, @@ -793,7 +792,6 @@ pub struct InvSledAgent { pub usable_hardware_threads: SqlU32, pub usable_physical_ram: ByteCount, pub reservoir_size: ByteCount, - pub omicron_physical_disks_generation: Generation, } impl InvSledAgent { @@ -837,9 +835,6 @@ impl InvSledAgent { sled_agent.usable_physical_ram, ), reservoir_size: ByteCount::from(sled_agent.reservoir_size), - omicron_physical_disks_generation: Generation::from( - sled_agent.omicron_physical_disks_generation, - ), }) } } @@ -1164,6 +1159,7 @@ impl From for nexus_types::inventory::Dataset { } } +/* /// Information about a sled's Omicron zones, part of /// [`nexus_types::inventory::SledAgent`]. /// @@ -1193,6 +1189,7 @@ impl InvSledOmicronZones { } } } +*/ impl_enum_type!( ZoneTypeEnum: diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 809df30aa16..1df74355b8f 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -2135,7 +2135,7 @@ mod tests { assert_eq!(blueprint1.sleds.len(), collection.sled_agents.len()); assert_eq!( blueprint1.all_omicron_zones(BlueprintZoneDisposition::any).count(), - collection.all_omicron_zones().count() + collection.all_ledgered_omicron_zones().count() ); // All zones should be in service. assert_all_zones_in_service(&blueprint1); diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index a64d2310f8b..fa02c88cbee 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -37,14 +37,11 @@ use nexus_db_model::InvCollection; use nexus_db_model::InvCollectionError; use nexus_db_model::InvDataset; use nexus_db_model::InvNvmeDiskFirmware; -use nexus_db_model::InvOmicronZone; -use nexus_db_model::InvOmicronZoneNic; use nexus_db_model::InvPhysicalDisk; use nexus_db_model::InvRootOfTrust; use nexus_db_model::InvRotPage; use nexus_db_model::InvServiceProcessor; use nexus_db_model::InvSledAgent; -use nexus_db_model::InvSledOmicronZones; use nexus_db_model::InvZpool; use nexus_db_model::RotImageError; use nexus_db_model::SledRole; @@ -61,7 +58,7 @@ use nexus_db_schema::enums::RotImageErrorEnum; use nexus_db_schema::enums::RotPageWhichEnum; use nexus_db_schema::enums::SledRoleEnum; use nexus_db_schema::enums::SpTypeEnum; -use nexus_sled_agent_shared::inventory::OmicronZonesConfig; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use nexus_types::inventory::PhysicalDiskFirmware; @@ -162,6 +159,7 @@ impl DataStore { } } + /* // Pull Omicron zone-related metadata out of all sled agents. // // TODO: InvSledOmicronZones is a vestigial table kept for backwards @@ -187,6 +185,7 @@ impl DataStore { }) }) .collect::, _>>()?; + */ // Pull disks out of all sled agents let disks: Vec<_> = collection @@ -240,6 +239,7 @@ impl DataStore { }) .collect::, Error>>()?; + /* let omicron_zone_nics = collection .sled_agents .values() @@ -252,6 +252,7 @@ impl DataStore { }) }) .collect::, _>>()?; + */ let mut inv_clickhouse_keeper_memberships = Vec::new(); for membership in &collection.clickhouse_keeper_cluster_membership { @@ -897,10 +898,6 @@ impl DataStore { sled_agent.reservoir_size, ) .into_sql::(), - nexus_db_model::Generation( - sled_agent.omicron_physical_disks_generation, - ) - .into_sql::(), )) .filter( baseboard_dsl::part_number @@ -926,7 +923,6 @@ impl DataStore { sa_dsl::usable_hardware_threads, sa_dsl::usable_physical_ram, sa_dsl::reservoir_size, - sa_dsl::omicron_physical_disks_generation, )) .execute_async(&conn) .await?; @@ -946,7 +942,6 @@ impl DataStore { _usable_hardware_threads, _usable_physical_ram, _reservoir_size, - _omicron_physical_disks_generation, ) = sa_dsl::inv_sled_agent::all_columns(); } @@ -962,6 +957,7 @@ impl DataStore { .await?; } + /* // Insert all the Omicron zones that we found. { use nexus_db_schema::schema::inv_sled_omicron_zones::dsl as sled_zones; @@ -987,6 +983,7 @@ impl DataStore { .execute_async(&conn) .await?; } + */ // Insert the clickhouse keeper memberships we've received { @@ -2122,6 +2119,7 @@ impl DataStore { ); } + /* // Now read the Omicron zones. // // In the first pass, we'll load the "inv_sled_omicron_zones" records. @@ -2267,6 +2265,13 @@ impl DataStore { map.zones.push(zone); } + bail_unless!( + omicron_zone_nics.is_empty(), + "found extra Omicron zone NICs: {:?}", + omicron_zone_nics.keys() + ); + */ + // Now load the clickhouse keeper cluster memberships let clickhouse_keeper_cluster_membership = { use nexus_db_schema::schema::inv_clickhouse_keeper_membership::dsl; @@ -2298,12 +2303,6 @@ impl DataStore { memberships }; - bail_unless!( - omicron_zone_nics.is_empty(), - "found extra Omicron zone NICs: {:?}", - omicron_zone_nics.keys() - ); - // Finally, build up the sled-agent map using the sled agent and // omicron zone rows. A for loop is easier to understand than into_iter // + filter_map + return Result + collect. @@ -2321,6 +2320,7 @@ impl DataStore { }) .transpose()?; + /* // Look up the Omicron zones. // // Older versions of Nexus fetched the Omicron zones in a separate @@ -2349,6 +2349,7 @@ impl DataStore { ); continue; }; + */ let sled_agent = nexus_types::inventory::SledAgent { time_collected: s.time_collected, @@ -2365,7 +2366,6 @@ impl DataStore { usable_hardware_threads: u32::from(s.usable_hardware_threads), usable_physical_ram: s.usable_physical_ram.into(), reservoir_size: s.reservoir_size.into(), - omicron_zones, // For disks, zpools, and datasets, the map for a sled ID is // only populated if there is at least one disk/zpool/dataset // for that sled. The `unwrap_or_default` calls cover the case @@ -2382,9 +2382,10 @@ impl DataStore { .get(sled_id.as_untyped_uuid()) .map(|datasets| datasets.to_vec()) .unwrap_or_default(), - omicron_physical_disks_generation: s - .omicron_physical_disks_generation - .into(), + // TODO-john This is all wrong! + ledgered_sled_config: None, + reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, + last_reconciliation: None, }; sled_agents.insert(sled_id, sled_agent); } diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 1d2423ddeac..abfcb113a8b 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -339,7 +339,8 @@ mod test { use dropshot::PaginationOrder; use nexus_db_lookup::LookupPath; use nexus_sled_agent_shared::inventory::{ - Baseboard, Inventory, InventoryDisk, OmicronZonesConfig, SledRole, + Baseboard, ConfigReconcilerInventoryStatus, Inventory, InventoryDisk, + SledRole, }; use nexus_types::identity::Asset; use omicron_common::api::external::ByteCount; @@ -692,15 +693,13 @@ mod test { sled_id: SledUuid::from_untyped_uuid(sled.id()), usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), - omicron_zones: OmicronZonesConfig { - generation: OmicronZonesConfig::INITIAL_GENERATION, - zones: vec![], - }, disks, zpools: vec![], datasets: vec![], - omicron_physical_disks_generation: - omicron_common::api::external::Generation::new(), + ledgered_sled_config: None, + reconciler_status: + ConfigReconcilerInventoryStatus::NotYetRun, + last_reconciliation: None, }, ) .unwrap(); diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 89d92585ce0..105df2cc45d 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1582,7 +1582,6 @@ table! { usable_hardware_threads -> Int8, usable_physical_ram -> Int8, reservoir_size -> Int8, - omicron_physical_disks_generation -> Int8, } } diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 960d4aa0879..42b3be0d6cb 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -149,12 +149,6 @@ impl CollectionBuilder { /// Assemble a complete `Collection` representation pub fn build(mut self) -> Collection { - // This is not strictly necessary. But for testing, it's helpful for - // things to be in sorted order. - for v in self.sleds.values_mut() { - v.omicron_zones.zones.sort_by(|a, b| a.id.cmp(&b.id)); - } - Collection { id: self.rng.id_rng.next(), errors: self.errors.into_iter().map(|e| e.to_string()).collect(), @@ -531,7 +525,6 @@ impl CollectionBuilder { reservoir_size: inventory.reservoir_size, time_collected, sled_id, - omicron_zones: inventory.omicron_zones, disks: inventory.disks.into_iter().map(|d| d.into()).collect(), zpools: inventory .zpools @@ -543,8 +536,9 @@ impl CollectionBuilder { .into_iter() .map(|d| d.into()) .collect(), - omicron_physical_disks_generation: inventory - .omicron_physical_disks_generation, + ledgered_sled_config: inventory.ledgered_sled_config, + reconciler_status: inventory.reconciler_status, + last_reconciliation: inventory.last_reconciliation, }; if let Some(previous) = self.sleds.get(&sled_id) { diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index 2e2e4c1ef36..d7f418c32cb 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -409,6 +409,7 @@ mod test { use crate::StaticSledAgentEnumerator; use gateway_messages::SpPort; use id_map::IdMap; + use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; @@ -426,6 +427,52 @@ mod test { use std::net::SocketAddrV6; use std::sync::Arc; + fn dump_sled_config(s: &mut String, config: &OmicronSledConfig) { + let OmicronSledConfig { + generation, + disks, + datasets, + zones, + remove_mupdate_override, + } = config; + + writeln!(s, " generation: {generation}").unwrap(); + writeln!( + s, + " remove_mupdate_override: {remove_mupdate_override:?}" + ) + .unwrap(); + for disk in disks { + writeln!( + s, + " disk {}: {} / {} / {}", + disk.id, + disk.identity.vendor, + disk.identity.model, + disk.identity.serial + ) + .unwrap(); + } + for dataset in datasets { + writeln!( + s, + " dataset {}: {}", + dataset.id, + dataset.name.full_name() + ) + .unwrap(); + } + for zone in zones { + writeln!( + s, + " zone {} type {}", + zone.id, + zone.zone_type.kind().report_str(), + ) + .unwrap(); + } + } + fn dump_collection(collection: &Collection) -> String { // Construct a stable, human-readable summary of the Collection // contents. We could use a `Debug` impl for this, but that's not quite @@ -520,21 +567,51 @@ mod test { write!(&mut s, " baseboard {:?}\n", sled_info.baseboard_id) .unwrap(); - write!( - &mut s, - " zone generation: {:?}\n", - sled_info.omicron_zones.generation - ) - .unwrap(); - write!(&mut s, " zones found:\n").unwrap(); - for zone in &sled_info.omicron_zones.zones { - write!( + if let Some(config) = &sled_info.ledgered_sled_config { + writeln!(&mut s, " ledgered sled config:").unwrap(); + dump_sled_config(&mut s, config); + } else { + writeln!(&mut s, " no ledgered sled config").unwrap(); + } + + if let Some(last_reconciliation) = &sled_info.last_reconciliation { + writeln!(&mut s, " last reconciled config:").unwrap(); + dump_sled_config( &mut s, - " zone {} type {}\n", - zone.id, - zone.zone_type.kind().report_str(), - ) - .unwrap(); + &last_reconciliation.last_reconciled_config, + ); + for (id, result) in &last_reconciliation.external_disks { + writeln!(&mut s, " result for disk {id}: {result:?}") + .unwrap(); + } + for (id, result) in &last_reconciliation.datasets { + writeln!(&mut s, " result for dataset {id}: {result:?}") + .unwrap(); + } + for (id, result) in &last_reconciliation.zones { + writeln!(&mut s, " result for zone {id}: {result:?}") + .unwrap(); + } + } else { + writeln!(&mut s, " no completed reconciliation").unwrap(); + } + + match &sled_info.reconciler_status { + ConfigReconcilerInventoryStatus::NotYetRun => { + writeln!(&mut s, " reconciler task not yet run") + .unwrap(); + } + ConfigReconcilerInventoryStatus::Running { config, .. } => { + writeln!( + &mut s, + " reconciler task running with config:" + ) + .unwrap(); + dump_sled_config(&mut s, config); + } + ConfigReconcilerInventoryStatus::Idle { .. } => { + writeln!(&mut s, " reconciler task idle").unwrap(); + } } } diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index f8dea3a4e0d..bac832cc148 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -14,10 +14,12 @@ use gateway_client::types::SpComponentCaboose; use gateway_client::types::SpState; use gateway_client::types::SpType; use nexus_sled_agent_shared::inventory::Baseboard; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDataset; use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::InventoryZpool; +use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_sled_agent_shared::inventory::SledRole; use nexus_types::inventory::BaseboardId; @@ -25,7 +27,6 @@ use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::RotPage; use nexus_types::inventory::RotPageWhich; use omicron_common::api::external::ByteCount; -use omicron_common::api::external::Generation; use omicron_common::disk::DiskVariant; use omicron_uuid_kinds::SledUuid; use std::sync::Arc; @@ -295,6 +296,31 @@ pub fn representative() -> Representative { let sled16: OmicronZonesConfig = serde_json::from_str(sled16_data).unwrap(); let sled17: OmicronZonesConfig = serde_json::from_str(sled17_data).unwrap(); + // Convert these to `OmicronSledConfig`s. For now we leave the disks and + // datasets blank. This is wrong but not (currently) a problem. If you + // landed here and it is a problem for you now, I apologize. + let sled14 = OmicronSledConfig { + generation: sled14.generation, + disks: Default::default(), + datasets: Default::default(), + zones: sled14.zones.into_iter().collect(), + remove_mupdate_override: None, + }; + let sled16 = OmicronSledConfig { + generation: sled16.generation, + disks: Default::default(), + datasets: Default::default(), + zones: sled16.zones.into_iter().collect(), + remove_mupdate_override: None, + }; + let sled17 = OmicronSledConfig { + generation: sled17.generation, + disks: Default::default(), + datasets: Default::default(), + zones: sled17.zones.into_iter().collect(), + remove_mupdate_override: None, + }; + // Report some sled agents. // // This first one will match "sled1_bb"'s baseboard information. @@ -387,10 +413,10 @@ pub fn representative() -> Representative { revision: 0, }, SledRole::Gimlet, - sled14, disks, zpools, datasets, + Some(sled14), ), ) .unwrap(); @@ -415,10 +441,10 @@ pub fn representative() -> Representative { revision: 0, }, SledRole::Scrimlet, - sled16, vec![], vec![], vec![], + Some(sled16), ), ) .unwrap(); @@ -438,10 +464,10 @@ pub fn representative() -> Representative { model: String::from("fellofftruck"), }, SledRole::Gimlet, - sled17, vec![], vec![], vec![], + Some(sled17), ), ) .unwrap(); @@ -459,15 +485,12 @@ pub fn representative() -> Representative { sled_agent_id_unknown, Baseboard::Unknown, SledRole::Gimlet, - // We only have omicron zones for three sleds so use empty zone - // info here. - OmicronZonesConfig { - generation: Generation::new(), - zones: Vec::new(), - }, vec![], vec![], vec![], + // We only have omicron zones for three sleds so report no sled + // config here. + None, ), ) .unwrap(); @@ -556,10 +579,10 @@ pub fn sled_agent( sled_id: SledUuid, baseboard: Baseboard, sled_role: SledRole, - omicron_zones: OmicronZonesConfig, disks: Vec, zpools: Vec, datasets: Vec, + ledgered_sled_config: Option, ) -> Inventory { Inventory { baseboard, @@ -569,10 +592,11 @@ pub fn sled_agent( sled_id, usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), - omicron_zones, disks, zpools, datasets, - omicron_physical_disks_generation: Generation::new(), + ledgered_sled_config, + reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, + last_reconciliation: None, } } diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 81126677e7e..4f838a9dc9c 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -655,11 +655,12 @@ mod test { let mut blueprint_sleds = BTreeMap::new(); for (sled_id, sa) in collection.sled_agents { + let ledgered_sled_config = sa.ledgered_sled_config.unwrap(); + // Convert the inventory `OmicronZonesConfig`s into // `BlueprintZoneConfig`s. This is going to get more painful over // time as we add to blueprints, but for now we can make this work. - let zones = sa - .omicron_zones + let zones = ledgered_sled_config .zones .into_iter() .map(|config| -> BlueprintZoneConfig { @@ -678,7 +679,7 @@ mod test { sled_id, BlueprintSledConfig { state: SledState::Active, - sled_agent_generation: sa.omicron_zones.generation, + sled_agent_generation: ledgered_sled_config.generation, disks: IdMap::new(), datasets: IdMap::new(), zones, diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 2e889f2d2d4..7822d0cfed8 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -2694,9 +2694,14 @@ pub mod test { let sled_id = { let mut selected_sled_id = None; for (sled_id, sa) in &mut collection.sled_agents { - let nzones_before_retain = sa.omicron_zones.zones.len(); - sa.omicron_zones.zones.retain(|z| !z.zone_type.is_nexus()); - if sa.omicron_zones.zones.len() < nzones_before_retain { + let sa_zones = &mut sa + .ledgered_sled_config + .as_mut() + .expect("example should have a sled config") + .zones; + let nzones_before_retain = sa_zones.len(); + sa_zones.retain(|z| !z.zone_type.is_nexus()); + if sa_zones.len() < nzones_before_retain { selected_sled_id = Some(*sled_id); // Also remove this zone from the blueprint. let mut removed_nexus = None; diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 48314befabd..65f4c0ff334 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -14,7 +14,6 @@ use crate::planner::rng::PlannerRng; use crate::system::SledBuilder; use crate::system::SystemDescription; use nexus_inventory::CollectionBuilderRng; -use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_types::deployment::Blueprint; use nexus_types::deployment::OmicronZoneNic; use nexus_types::deployment::PlanningInput; @@ -486,15 +485,7 @@ impl ExampleSystemBuilder { for (sled_id, sled_cfg) in &blueprint.sleds { let sled_cfg = sled_cfg.clone().into_in_service_sled_config(); - system - .sled_set_omicron_zones( - *sled_id, - OmicronZonesConfig { - generation: sled_cfg.generation, - zones: sled_cfg.zones.into_iter().collect(), - }, - ) - .unwrap(); + system.sled_set_omicron_config(*sled_id, sled_cfg).unwrap(); } // We just ensured that a handful of datasets should exist in @@ -612,8 +603,10 @@ mod tests { nexus_zones.len(), nexus_zones, ); - let nexus_zones = - collection_zones_of_kind(&example.collection, ZoneKind::Nexus); + let nexus_zones = collection_ledgered_zones_of_kind( + &example.collection, + ZoneKind::Nexus, + ); assert_eq!( nexus_zones.len(), 6, @@ -631,7 +624,7 @@ mod tests { internal_dns_zones.len(), internal_dns_zones, ); - let internal_dns_zones = collection_zones_of_kind( + let internal_dns_zones = collection_ledgered_zones_of_kind( &example.collection, ZoneKind::InternalDns, ); @@ -652,7 +645,7 @@ mod tests { external_dns_zones.len(), external_dns_zones, ); - let external_dns_zones = collection_zones_of_kind( + let external_dns_zones = collection_ledgered_zones_of_kind( &example.collection, ZoneKind::ExternalDns, ); @@ -673,7 +666,7 @@ mod tests { crucible_pantry_zones.len(), crucible_pantry_zones, ); - let crucible_pantry_zones = collection_zones_of_kind( + let crucible_pantry_zones = collection_ledgered_zones_of_kind( &example.collection, ZoneKind::CruciblePantry, ); @@ -700,12 +693,12 @@ mod tests { .collect() } - fn collection_zones_of_kind( + fn collection_ledgered_zones_of_kind( collection: &Collection, kind: ZoneKind, ) -> Vec<&OmicronZoneConfig> { collection - .all_omicron_zones() + .all_ledgered_omicron_zones() .filter(|zone| zone.zone_type.kind() == kind) .collect() } diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 38f85e772fc..3d681dda640 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -218,7 +218,13 @@ impl<'a> Planner<'a> { .inventory .sled_agents .get(&sled_id) - .map(|sa| sa.omicron_physical_disks_generation) + // TODO-correctness For now this is correct, because sled-agent + // doesn't ledger a config until it's applied it. However, once + // https://github.com/oxidecomputer/omicron/pull/8064 lands, + // sled-agent will ledger a config and then later reconcile it; do + // we need to wait for that reconciliation to decommission disks? + .and_then(|sa| sa.ledgered_sled_config.as_ref()) + .map(|config| config.generation) else { // There is no current inventory for the sled agent, so we cannot // decommission any disks. @@ -418,14 +424,18 @@ impl<'a> Planner<'a> { as_of_generation, ready_for_cleanup, } if !ready_for_cleanup => { - if sled_inv.omicron_zones.generation >= as_of_generation - && !sled_inv - .omicron_zones - .zones - .iter() - .any(|z| z.id == zone.id) - { - zones_ready_for_cleanup.push(zone.id); + // TODO-correctness For now this is correct, because + // sled-agent doesn't ledger a config until it's applied it. + // However, as a part of landing + // https://github.com/oxidecomputer/omicron/pull/8064, + // this needs to change to check the last reconciled config + // instead of just the ledgered config. + if let Some(config) = &sled_inv.ledgered_sled_config { + if config.generation >= as_of_generation + && !config.zones.contains_key(&zone.id) + { + zones_ready_for_cleanup.push(zone.id); + } } } BlueprintZoneDisposition::InService @@ -558,16 +568,25 @@ impl<'a> Planner<'a> { // the NTP zone or switching between an internal NTP vs. boundary // NTP zone), we'll need to be careful how we do it to avoid a // problem here. + // + // TODO-cleanup Once + // https://github.com/oxidecomputer/omicron/pull/8064 lands, the + // above comment will be overly conservative; sled-agent won't + // reject configs just because time isn't sync'd yet. We may be able + // to remove this check entirely. (It's probably also fine to keep + // it for now; removing it just saves us an extra planning iteration + // when adding a new sled.) let has_ntp_inventory = self .inventory .sled_agents .get(&sled_id) .map(|sled_agent| { - sled_agent - .omicron_zones - .zones - .iter() - .any(|z| z.zone_type.is_ntp()) + sled_agent.ledgered_sled_config.as_ref().map_or( + false, + |config| { + config.zones.iter().any(|z| z.zone_type.is_ntp()) + }, + ) }) .unwrap_or(false); if !has_ntp_inventory { @@ -995,7 +1014,6 @@ pub(crate) mod test { use clickhouse_admin_types::ClickhouseKeeperClusterMembership; use clickhouse_admin_types::KeeperId; use expectorate::assert_contents; - use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_types::deployment::BlueprintDatasetDisposition; use nexus_types::deployment::BlueprintDiffSummary; use nexus_types::deployment::BlueprintPhysicalDiskDisposition; @@ -1180,18 +1198,15 @@ pub(crate) mod test { // example.collection -- this should be addressed via API improvements. example .system - .sled_set_omicron_zones(new_sled_id, { - let sled_cfg = blueprint4 + .sled_set_omicron_config( + new_sled_id, + blueprint4 .sleds .get(&new_sled_id) .expect("blueprint should contain zones for new sled") .clone() - .into_in_service_sled_config(); - OmicronZonesConfig { - generation: sled_cfg.generation, - zones: sled_cfg.zones.into_iter().collect(), - } - }) + .into_in_service_sled_config(), + ) .unwrap(); let collection = example.system.to_collection_builder().unwrap().build(); @@ -2043,7 +2058,10 @@ pub(crate) mod test { .sled_agents .get(&sled_id) .unwrap() - .omicron_physical_disks_generation, + .ledgered_sled_config + .as_ref() + .unwrap() + .generation, Generation::new() ); @@ -2130,7 +2148,10 @@ pub(crate) mod test { .sled_agents .get_mut(&sled_id) .unwrap() - .omicron_physical_disks_generation = Generation::from_u32(3); + .ledgered_sled_config + .as_mut() + .unwrap() + .generation = Generation::from_u32(3); let blueprint3 = Planner::new_based_on( logctx.log.clone(), @@ -4095,7 +4116,9 @@ pub(crate) mod test { .sled_agents .get_mut(&sled_id) .unwrap() - .omicron_zones + .ledgered_sled_config + .as_mut() + .unwrap() .generation = bp2_generation; collection }, @@ -4111,7 +4134,9 @@ pub(crate) mod test { .sled_agents .get_mut(&sled_id) .unwrap() - .omicron_zones + .ledgered_sled_config + .as_mut() + .unwrap() .zones .retain(|z| z.id != nexus_config.id); collection @@ -4121,13 +4146,15 @@ pub(crate) mod test { // Now make both changes to the inventory. { - let zones = &mut collection + let config = &mut collection .sled_agents .get_mut(&sled_id) .unwrap() - .omicron_zones; - zones.generation = bp2_generation; - zones.zones.retain(|z| z.id != nexus_config.id); + .ledgered_sled_config + .as_mut() + .unwrap(); + config.generation = bp2_generation; + config.zones.retain(|z| z.id != nexus_config.id); } // Run the planner. It mark our Nexus zone as ready for cleanup now that @@ -4269,13 +4296,15 @@ pub(crate) mod test { // Make the inventory changes necessary for cleanup to proceed. { - let zones = &mut collection + let config = &mut collection .sled_agents .get_mut(&sled_id) .unwrap() - .omicron_zones; - zones.generation = bp2_generation; - zones.zones.retain(|z| z.id != internal_dns_config.id); + .ledgered_sled_config + .as_mut() + .unwrap(); + config.generation = bp2_generation; + config.zones.retain(|z| z.id != internal_dns_config.id); } // Run the planner. It should mark our internal DNS zone as ready for diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 472bfe3bfa5..12219daaf81 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -13,11 +13,12 @@ use ipnet::Ipv6Net; use ipnet::Ipv6Subnets; use nexus_inventory::CollectionBuilder; use nexus_sled_agent_shared::inventory::Baseboard; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDataset; use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::InventoryZpool; -use nexus_sled_agent_shared::inventory::OmicronZonesConfig; +use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::SledRole; use nexus_types::deployment::ClickhousePolicy; use nexus_types::deployment::CockroachDbClusterVersion; @@ -307,7 +308,7 @@ impl SystemDescription { sled.unique, sled.hardware, hardware_slot, - sled.omicron_zones, + sled.sled_config, sled.npools, ); self.sleds.insert(sled_id, Arc::new(sled)); @@ -367,7 +368,7 @@ impl SystemDescription { !self.sleds.is_empty() } - /// Set Omicron zones for a sled. + /// Set Omicron config for a sled. /// /// The zones will be reported in the collection generated by /// [`Self::to_collection_builder`]. @@ -376,17 +377,23 @@ impl SystemDescription { /// /// # Notes /// - /// It is okay to call `sled_set_omicron_zones` in ways that wouldn't + /// It is okay to call `sled_set_omicron_config` in ways that wouldn't /// happen in production, such as to test illegal states. - pub fn sled_set_omicron_zones( + pub fn sled_set_omicron_config( &mut self, sled_id: SledUuid, - omicron_zones: OmicronZonesConfig, + sled_config: OmicronSledConfig, ) -> anyhow::Result<&mut Self> { let sled = self.sleds.get_mut(&sled_id).with_context(|| { format!("attempted to access sled {} not found in system", sled_id) })?; - Arc::make_mut(sled).inventory_sled_agent.omicron_zones = omicron_zones; + let sled = Arc::make_mut(sled); + + sled.inventory_sled_agent.ledgered_sled_config = Some(sled_config); + sled.inventory_sled_agent.reconciler_status = + ConfigReconcilerInventoryStatus::NotYetRun; + sled.inventory_sled_agent.last_reconciliation = None; + Ok(self) } @@ -490,7 +497,7 @@ pub struct SledBuilder { hardware: SledHardware, hardware_slot: Option, sled_role: SledRole, - omicron_zones: OmicronZonesConfig, + sled_config: OmicronSledConfig, npools: u8, } @@ -508,11 +515,7 @@ impl SledBuilder { hardware: SledHardware::Gimlet, hardware_slot: None, sled_role: SledRole::Gimlet, - omicron_zones: OmicronZonesConfig { - // The initial generation is the one with no zones. - generation: OmicronZonesConfig::INITIAL_GENERATION, - zones: Vec::new(), - }, + sled_config: OmicronSledConfig::default(), npools: Self::DEFAULT_NPOOLS, } } @@ -601,7 +604,7 @@ impl Sled { unique: Option, hardware: SledHardware, hardware_slot: u16, - omicron_zones: OmicronZonesConfig, + sled_config: OmicronSledConfig, nzpools: u8, ) -> Sled { use typed_rng::TypedUuidRng; @@ -688,7 +691,6 @@ impl Sled { sled_id, usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), - omicron_zones, // Populate disks, appearing like a real device. disks: zpools .values() @@ -714,7 +716,9 @@ impl Sled { }) .collect(), datasets: vec![], - omicron_physical_disks_generation: Generation::new(), + ledgered_sled_config: Some(sled_config), + reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, + last_reconciliation: None, } }; @@ -849,11 +853,12 @@ impl Sled { sled_id, usable_hardware_threads: inv_sled_agent.usable_hardware_threads, usable_physical_ram: inv_sled_agent.usable_physical_ram, - omicron_zones: inv_sled_agent.omicron_zones.clone(), disks: vec![], zpools: vec![], datasets: vec![], - omicron_physical_disks_generation: Generation::new(), + ledgered_sled_config: inv_sled_agent.ledgered_sled_config.clone(), + reconciler_status: inv_sled_agent.reconciler_status.clone(), + last_reconciliation: inv_sled_agent.last_reconciliation.clone(), }; Sled { diff --git a/nexus/src/app/background/tasks/sync_service_zone_nat.rs b/nexus/src/app/background/tasks/sync_service_zone_nat.rs index 02d49e7f590..a1c1869fed9 100644 --- a/nexus/src/app/background/tasks/sync_service_zone_nat.rs +++ b/nexus/src/app/background/tasks/sync_service_zone_nat.rs @@ -17,9 +17,7 @@ use nexus_db_lookup::LookupPath; use nexus_db_model::Ipv4NatValues; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; -use nexus_sled_agent_shared::inventory::{ - OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, -}; +use nexus_sled_agent_shared::inventory::OmicronZoneType; use omicron_common::address::{MAX_PORT, MIN_PORT}; use omicron_uuid_kinds::GenericUuid; use serde_json::json; @@ -127,8 +125,22 @@ impl BackgroundTask for ServiceZoneNatTracker { let sled_address = oxnet::Ipv6Net::host_net(*sled.ip); - let zones_config: OmicronZonesConfig = sa.omicron_zones; - let zones: Vec = zones_config.zones; + // TODO-correctness Looking at inventory here is a little + // sketchy. We currently check the most-recently-ledgered zones + // which tells us what services sled-agent things it's supposed + // to be running. It might be better to check either: + // + // * `sa.last_reconciliation` (to know what zones are actually + // running; this requires + // https://github.com/oxidecomputer/omicron/pull/8064 landing) + // if the goal is to sync what's actually on the sled + // * a rendezvous table populated by reconfigurator if the goal + // is to sync with what's Nexus thinks is supposed to be + // running on the sled + let zones = sa + .ledgered_sled_config + .map(|config| config.zones) + .unwrap_or_default(); for zone in zones { let zone_type: OmicronZoneType = zone.zone_type; diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 2540a5fbb19..2afca8e1970 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -19,14 +19,15 @@ pub use gateway_client::types::PowerState; pub use gateway_client::types::RotImageError; pub use gateway_client::types::RotSlot; pub use gateway_client::types::SpType; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::InventoryDataset; use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::InventoryZpool; +use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; -use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_sled_agent_shared::inventory::SledRole; use omicron_common::api::external::ByteCount; -use omicron_common::api::external::Generation; pub use omicron_common::api::internal::shared::NetworkInterface; pub use omicron_common::api::internal::shared::NetworkInterfaceKind; pub use omicron_common::api::internal::shared::SourceNatConfig; @@ -166,11 +167,15 @@ impl Collection { .and_then(|by_bb| by_bb.get(baseboard_id)) } - /// Iterate over all the Omicron zones in the collection - pub fn all_omicron_zones( + /// Iterate over all the Omicron zones in the sled-agent ledgers of this + /// collection + pub fn all_ledgered_omicron_zones( &self, ) -> impl Iterator { - self.sled_agents.values().flat_map(|sa| sa.omicron_zones.zones.iter()) + self.sled_agents + .values() + .filter_map(|sa| sa.ledgered_sled_config.as_ref()) + .flat_map(|config| config.zones.iter()) } /// Iterate over the sled ids of sleds identified as Scrimlets @@ -543,17 +548,10 @@ pub struct SledAgent { pub usable_hardware_threads: u32, pub usable_physical_ram: ByteCount, pub reservoir_size: ByteCount, - pub omicron_zones: OmicronZonesConfig, pub disks: Vec, pub zpools: Vec, pub datasets: Vec, - /// As part of reconfigurator planning we need to know the control plane - /// disks configuration that the sled-agent has seen last. Specifically, - /// this allows the planner to know if a disk expungement has been seen by - /// the sled-agent, so that the planner can decommission the expunged disk. - /// - /// This field corresponds to the `generation` field in - /// `OmicronPhysicalDisksConfig` that is stored in the blueprint and sent to - /// the sled-agent via the executor over the internal API. - pub omicron_physical_disks_generation: Generation, + pub ledgered_sled_config: Option, + pub reconciler_status: ConfigReconcilerInventoryStatus, + pub last_reconciliation: Option, } diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 9185123b558..2636fb565cc 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3599,9 +3599,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( usable_physical_ram INT8 NOT NULL, reservoir_size INT8 CHECK (reservoir_size < usable_physical_ram) NOT NULL, - -- The last generation of OmicronPhysicalDisksConfig seen by the sled-agent - omicron_physical_disks_generation INT8 NOT NULL, - PRIMARY KEY (inv_collection_id, sled_id) ); diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 3a58e9583a8..d5910b2837e 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -1150,10 +1150,9 @@ impl ServicePortBuilder { #[cfg(test)] mod tests { use super::*; - use nexus_sled_agent_shared::inventory::OmicronZonesConfig; + use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use omicron_common::address::IpRange; use omicron_common::api::external::ByteCount; - use omicron_common::api::external::Generation; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::RackNetworkConfig; use oxnet::Ipv6Net; @@ -1369,14 +1368,12 @@ mod tests { usable_hardware_threads: 32, usable_physical_ram: ByteCount::try_from(1_u64 << 40).unwrap(), reservoir_size: ByteCount::try_from(1_u64 << 40).unwrap(), - omicron_zones: OmicronZonesConfig { - generation: OmicronZonesConfig::INITIAL_GENERATION, - zones: vec![], - }, disks, zpools: vec![], datasets: vec![], - omicron_physical_disks_generation: Generation::new(), + ledgered_sled_config: None, + reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, + last_reconciliation: None, }, is_scrimlet, )]; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1503ca5f268..ad1778501bb 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1657,8 +1657,8 @@ mod test { use crate::rack_setup::plan::service::{Plan as ServicePlan, SledInfo}; use nexus_reconfigurator_blippy::{Blippy, BlippyReportSortKey}; use nexus_sled_agent_shared::inventory::{ - Baseboard, Inventory, InventoryDisk, OmicronZoneType, - OmicronZonesConfig, SledRole, + Baseboard, ConfigReconcilerInventoryStatus, Inventory, InventoryDisk, + OmicronZoneType, SledRole, }; use omicron_common::{ address::{Ipv6Subnet, SLED_PREFIX, get_sled_address}, @@ -1685,10 +1685,6 @@ mod test { usable_hardware_threads: 32, usable_physical_ram: ByteCount::from_gibibytes_u32(16), reservoir_size: ByteCount::from_gibibytes_u32(0), - omicron_zones: OmicronZonesConfig { - generation: Generation::new(), - zones: vec![], - }, disks: (0..u2_count) .map(|i| InventoryDisk { identity: DiskIdentity { @@ -1707,7 +1703,9 @@ mod test { .collect(), zpools: vec![], datasets: vec![], - omicron_physical_disks_generation: Generation::new(), + ledgered_sled_config: None, + reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, + last_reconciliation: None, }, true, ) diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 032c7205729..977d6145e07 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -24,8 +24,9 @@ use dropshot::Body; use dropshot::HttpError; use futures::Stream; use nexus_sled_agent_shared::inventory::{ - Inventory, InventoryDataset, InventoryDisk, InventoryZpool, - OmicronSledConfig, OmicronSledConfigResult, OmicronZonesConfig, SledRole, + ConfigReconcilerInventoryStatus, Inventory, InventoryDataset, + InventoryDisk, InventoryZpool, OmicronSledConfig, OmicronSledConfigResult, + OmicronZonesConfig, SledRole, }; use omicron_common::api::external::{ ByteCount, DiskState, Error, Generation, ResourceType, @@ -727,6 +728,22 @@ impl SledAgent { }; let storage = self.storage.lock(); + + let disks_config = storage + .omicron_physical_disks_list() + .context("list disks config")?; + let datasets_config = + storage.datasets_config_list().context("list datasets config")?; + let zones_config = self.fake_zones.lock().unwrap().clone(); + + let sled_config = OmicronSledConfig { + generation: zones_config.generation, + disks: disks_config.disks.into_iter().collect(), + datasets: datasets_config.datasets.into_values().collect(), + zones: zones_config.zones.into_iter().collect(), + remove_mupdate_override: None, + }; + Ok(Inventory { sled_id: self.id, sled_agent_address, @@ -741,7 +758,6 @@ impl SledAgent { self.config.hardware.reservoir_ram, ) .context("reservoir_size")?, - omicron_zones: self.fake_zones.lock().unwrap().clone(), disks: storage .physical_disks() .values() @@ -789,7 +805,9 @@ impl SledAgent { .collect::>() }) .unwrap_or_else(|_| vec![]), - omicron_physical_disks_generation: Generation::new(), + ledgered_sled_config: Some(sled_config), + reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, + last_reconciliation: None, }) } diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index a8b1f26384b..e09e14790ea 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -1619,7 +1619,7 @@ impl StorageInner { } pub fn omicron_physical_disks_list( - &mut self, + &self, ) -> Result { let Some(config) = self.config.as_ref() else { return Err(HttpError::for_not_found( diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 30d87583007..7e0c9226bf7 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -32,8 +32,9 @@ use futures::StreamExt; use futures::stream::FuturesUnordered; use illumos_utils::opte::PortManager; use nexus_sled_agent_shared::inventory::{ - Inventory, InventoryDataset, InventoryDisk, InventoryZpool, - OmicronSledConfig, OmicronSledConfigResult, OmicronZonesConfig, SledRole, + ConfigReconcilerInventoryStatus, Inventory, InventoryDataset, + InventoryDisk, InventoryZpool, OmicronSledConfig, OmicronSledConfigResult, + OmicronZonesConfig, SledRole, }; use omicron_common::address::{ Ipv6Subnet, SLED_PREFIX, get_sled_address, get_switch_zone_address, @@ -74,6 +75,7 @@ use sled_hardware_types::Baseboard; use sled_hardware_types::underlay::BootstrapInterface; use sled_storage::manager::StorageHandle; use slog::Logger; +use slog_error_chain::InlineErrorChain; use sprockets_tls::keys::SprocketsConfig; use std::collections::BTreeMap; use std::net::{Ipv6Addr, SocketAddrV6}; @@ -294,16 +296,21 @@ pub enum InventoryError { // system. #[error(transparent)] BadByteCount(#[from] ByteCountRangeError), + #[error("failed to get current ledgered disks")] + GetDisksConfig(#[source] sled_storage::error::Error), + #[error("failed to get current ledgered datasets")] + GetDatasetsConfig(#[source] sled_storage::error::Error), } impl From for omicron_common::api::external::Error { fn from(inventory_error: InventoryError) -> Self { match inventory_error { - e @ InventoryError::BadByteCount(..) => { - omicron_common::api::external::Error::internal_error(&format!( - "{:#}", - e - )) + e @ (InventoryError::BadByteCount(..) + | InventoryError::GetDisksConfig(_) + | InventoryError::GetDatasetsConfig(_)) => { + omicron_common::api::external::Error::internal_error( + &InlineErrorChain::new(&e).to_string(), + ) } } } @@ -1355,10 +1362,17 @@ impl SledAgent { let mut disks = vec![]; let mut zpools = vec![]; let mut datasets = vec![]; - let (all_disks, omicron_zones) = tokio::join!( + let (all_disks, disks_config, datasets_config, omicron_zones) = tokio::join!( self.storage().get_latest_disks(), + self.storage().omicron_physical_disks_list(), + self.storage().datasets_config_list(), self.inner.services.omicron_zones_list() ); + let disks_config = + disks_config.map_err(InventoryError::GetDisksConfig)?; + let datasets_config = + datasets_config.map_err(InventoryError::GetDatasetsConfig)?; + for (identity, variant, slot, firmware) in all_disks.iter_all() { disks.push(InventoryDisk { identity: identity.clone(), @@ -1410,6 +1424,16 @@ impl SledAgent { datasets.extend(inv_props); } + // Reassemble our combined sled config from its separate pieces. (This + // will go away once we start ledgering the config as a single unit.) + let sled_config = OmicronSledConfig { + generation: omicron_zones.generation, + disks: disks_config.disks.into_iter().collect(), + datasets: datasets_config.datasets.into_values().collect(), + zones: omicron_zones.zones.into_iter().collect(), + remove_mupdate_override: None, + }; + Ok(Inventory { sled_id, sled_agent_address, @@ -1418,11 +1442,14 @@ impl SledAgent { usable_hardware_threads, usable_physical_ram: ByteCount::try_from(usable_physical_ram)?, reservoir_size, - omicron_zones, disks, zpools, datasets, - omicron_physical_disks_generation: *all_disks.generation(), + // These fields will come from the reconciler once it's integrated. + // For now, we can report our ledgered config but nothing else. + ledgered_sled_config: Some(sled_config), + reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, + last_reconciliation: None, }) } From d507eecf2b5183854e337df389511331b6eecf21 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 16 May 2025 12:00:52 -0400 Subject: [PATCH 02/22] update db schema and model for new inventory fields --- nexus/db-model/src/inventory.rs | 194 ++++++++++++++++++++++++-------- nexus/db-schema/src/enums.rs | 1 + nexus/db-schema/src/schema.rs | 118 ++++++++++++++++--- schema/crdb/dbinit.sql | 191 +++++++++++++++++++++++++++---- 4 files changed, 417 insertions(+), 87 deletions(-) diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index a5aefbf1f60..a50ed33c243 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -4,6 +4,7 @@ //! Types for representing the hardware/software inventory in the database +use crate::Generation; use crate::PhysicalDiskKind; use crate::omicron_zone_config::{self, OmicronZoneNic}; use crate::typed_uuid::DbTypedUuid; @@ -24,10 +25,15 @@ use diesel::{serialize, sql_types}; use ipnetwork::IpNetwork; use nexus_db_schema::schema::{ hw_baseboard_id, inv_caboose, inv_clickhouse_keeper_membership, - inv_collection, inv_collection_error, inv_dataset, inv_nvme_disk_firmware, - inv_omicron_zone, inv_omicron_zone_nic, inv_physical_disk, - inv_root_of_trust, inv_root_of_trust_page, inv_service_processor, - inv_sled_agent, inv_zpool, sw_caboose, sw_root_of_trust_page, + inv_collection, inv_collection_error, inv_dataset, + inv_last_reconciliation_dataset_result, + inv_last_reconciliation_disk_result, inv_last_reconciliation_zone_result, + inv_nvme_disk_firmware, inv_omicron_sled_config, + inv_omicron_sled_config_dataset, inv_omicron_sled_config_disk, + inv_omicron_sled_config_zone, inv_omicron_sled_config_zone_nic, + inv_physical_disk, inv_root_of_trust, inv_root_of_trust_page, + inv_service_processor, inv_sled_agent, inv_zpool, sw_caboose, + sw_root_of_trust_page, }; use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZoneDataset, OmicronZoneImageSource, @@ -41,6 +47,7 @@ use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::DatasetKind; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::MupdateOverrideKind; use omicron_uuid_kinds::SledKind; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolKind; @@ -792,12 +799,61 @@ pub struct InvSledAgent { pub usable_hardware_threads: SqlU32, pub usable_physical_ram: ByteCount, pub reservoir_size: ByteCount, + // Soft foreign key to an `InvOmicronSledConfig` + pub ledgered_sled_config: Option, + + // Soft foreign key to an `InvOmicronSledConfig`. May or may not be the same + // as `ledgered_sled_config`. + pub last_reconciliation_sled_config: Option, + + #[diesel(embed)] + pub reconciler_status: InvConfigReconcilerStatus, } +/// See [`nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_sled_agent)] +pub struct InvConfigReconcilerStatus { + pub reconciler_status_kind: InvConfigReconcilerStatusKind, + // Soft foreign key to an `InvOmicronSledConfig`. May or may not be the same + // as either of the other sled config keys above. Only populated if + // `reconciler_status_kind` is `Running`. + pub reconciler_status_sled_config: Option, + // Interpretation varies based on `reconciler_status_kind`: + // + // * `NotYetRun` - always `None` + // * `Running` - `started_at` time + // * `Idle` - `completed_at` time + pub reconciler_status_timestamp: Option>, + // Interpretation varies based on `reconciler_status_kind`: + // + // * `NotYetRun` - always `None` + // * `Running` - `running_for` duration + // * `Idle` - `ran_for` duration + pub reconciler_status_duration_secs: Option, +} + +// See [`nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus`]. +impl_enum_type!( + InvConfigReconcilerStatusKindEnum: + + #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, PartialEq)] + pub enum InvConfigReconcilerStatusKind; + + // Enum values + NotYetRun => b"not-yet-run" + Running => b"running" + Idle => b"idle" +); + impl InvSledAgent { + /// Construct a new `InvSledAgent`. pub fn new_without_baseboard( collection_id: CollectionUuid, sled_agent: &nexus_types::inventory::SledAgent, + ledgered_sled_config: Option, + last_reconciliation_sled_config: Option, + reconciler_status: InvConfigReconcilerStatus, ) -> Result { // It's irritating to have to check this case at runtime. The challenge // is that if this sled agent does have a baseboard id, we don't know @@ -835,11 +891,41 @@ impl InvSledAgent { sled_agent.usable_physical_ram, ), reservoir_size: ByteCount::from(sled_agent.reservoir_size), + ledgered_sled_config, + last_reconciliation_sled_config, + reconciler_status, }) } } } +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_last_reconciliation_disk_result)] +pub struct InvLastReconciliationDiskResult { + pub inv_collection_id: DbTypedUuid, + pub sled_id: DbTypedUuid, + pub disk_id: DbTypedUuid, + pub error_message: Option, +} + +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_last_reconciliation_dataset_result)] +pub struct InvLastReconciliationDatasetResult { + pub inv_collection_id: DbTypedUuid, + pub sled_id: DbTypedUuid, + pub dataset_id: DbTypedUuid, + pub error_message: Option, +} + +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_last_reconciliation_zone_result)] +pub struct InvLastReconciliationZoneResult { + pub inv_collection_id: DbTypedUuid, + pub sled_id: DbTypedUuid, + pub zone_id: DbTypedUuid, + pub error_message: Option, +} + /// See [`nexus_types::inventory::PhysicalDisk`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_physical_disk)] @@ -1159,38 +1245,18 @@ impl From for nexus_types::inventory::Dataset { } } -/* -/// Information about a sled's Omicron zones, part of -/// [`nexus_types::inventory::SledAgent`]. -/// -/// TODO: This table is vestigial and can be combined with `InvSledAgent`. See -/// [issue #6770](https://github.com/oxidecomputer/omicron/issues/6770). +/// Top-level information contained in an +/// [`nexus_sled_agent_shared::inventory::OmicronSledConfig`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] -#[diesel(table_name = inv_sled_omicron_zones)] -pub struct InvSledOmicronZones { +#[diesel(table_name = inv_omicron_sled_config)] +pub struct InvOmicronSledConfig { pub inv_collection_id: DbTypedUuid, - pub time_collected: DateTime, - pub source: String, pub sled_id: DbTypedUuid, + pub id: Uuid, pub generation: Generation, + pub remove_mupdate_override: DbTypedUuid, } -impl InvSledOmicronZones { - pub fn new( - inv_collection_id: CollectionUuid, - sled_agent: &nexus_types::inventory::SledAgent, - ) -> InvSledOmicronZones { - InvSledOmicronZones { - inv_collection_id: inv_collection_id.into(), - time_collected: sled_agent.time_collected, - source: sled_agent.source.clone(), - sled_id: sled_agent.sled_id.into(), - generation: Generation(sled_agent.omicron_zones.generation), - } - } -} -*/ - impl_enum_type!( ZoneTypeEnum: @@ -1272,11 +1338,43 @@ impl From for ZoneType { } } +/// See [`omicron_common::disk::OmicronPhysicalDiskConfig`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_omicron_sled_config_disk)] +pub struct InvOmicronSledConfigDisk { + pub sled_config_id: Uuid, + pub sled_id: DbTypedUuid, + pub id: DbTypedUuid, + + pub vendor: String, + pub serial: String, + pub model: String, + + pub pool_id: Uuid, +} + +/// See [`omicron_common::disk::DatasetConfig`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_omicron_sled_config_dataset)] +pub struct InvOmicronSledConfigDataset { + pub sled_config_id: Uuid, + pub sled_id: DbTypedUuid, + pub id: DbTypedUuid, + + pub pool_id: DbTypedUuid, + pub kind: crate::DatasetKind, + zone_name: Option, + + pub quota: Option, + pub reservation: Option, + pub compression: String, +} + /// See [`nexus_sled_agent_shared::inventory::OmicronZoneConfig`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] -#[diesel(table_name = inv_omicron_zone)] -pub struct InvOmicronZone { - pub inv_collection_id: DbTypedUuid, +#[diesel(table_name = inv_omicron_sled_config_zone)] +pub struct InvOmicronSledConfigZone { + pub sled_config_id: Uuid, pub sled_id: DbTypedUuid, pub id: DbTypedUuid, pub zone_type: ZoneType, @@ -1299,18 +1397,18 @@ pub struct InvOmicronZone { pub filesystem_pool: Option>, } -impl InvOmicronZone { +impl InvOmicronSledConfigZone { pub fn new( - inv_collection_id: CollectionUuid, + sled_config_id: Uuid, sled_id: SledUuid, zone: &OmicronZoneConfig, - ) -> Result { + ) -> Result { // Create a dummy record to start, then fill in the rest // according to the zone type - let mut inv_omicron_zone = InvOmicronZone { + let mut inv_omicron_zone = InvOmicronSledConfigZone { // Fill in the known fields that don't require inspecting // `zone.zone_type` - inv_collection_id: inv_collection_id.into(), + sled_config_id, sled_id: sled_id.into(), id: zone.id.into(), filesystem_pool: zone @@ -1488,7 +1586,7 @@ impl InvOmicronZone { pub fn into_omicron_zone_config( self, - nic_row: Option, + nic_row: Option, ) -> Result { // Build up a set of common fields for our `OmicronZoneType`s // @@ -1638,9 +1736,9 @@ impl InvOmicronZone { } #[derive(Queryable, Clone, Debug, Selectable, Insertable)] -#[diesel(table_name = inv_omicron_zone_nic)] -pub struct InvOmicronZoneNic { - inv_collection_id: DbTypedUuid, +#[diesel(table_name = inv_omicron_sled_config_zone_nic)] +pub struct InvOmicronSledConfigZoneNic { + sled_config_id: Uuid, pub id: Uuid, name: Name, ip: IpNetwork, @@ -1651,8 +1749,8 @@ pub struct InvOmicronZoneNic { slot: SqlU8, } -impl From for OmicronZoneNic { - fn from(value: InvOmicronZoneNic) -> Self { +impl From for OmicronZoneNic { + fn from(value: InvOmicronSledConfigZoneNic) -> Self { OmicronZoneNic { id: value.id, name: value.name, @@ -1666,17 +1764,17 @@ impl From for OmicronZoneNic { } } -impl InvOmicronZoneNic { +impl InvOmicronSledConfigZoneNic { pub fn new( - inv_collection_id: CollectionUuid, + sled_config_id: Uuid, zone: &OmicronZoneConfig, - ) -> Result, anyhow::Error> { + ) -> Result, anyhow::Error> { let Some(nic) = zone.zone_type.service_vnic() else { return Ok(None); }; let nic = OmicronZoneNic::new(zone.id, nic)?; Ok(Some(Self { - inv_collection_id: inv_collection_id.into(), + sled_config_id, id: nic.id, name: nic.name, ip: nic.ip, diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index 79d312c8b64..b238535f81e 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -43,6 +43,7 @@ define_enums! { InstanceAutoRestartPolicyEnum => "instance_auto_restart", InstanceStateEnum => "instance_state_v2", InstanceIntendedStateEnum => "instance_intended_state", + InvConfigReconcilerStatusKindEnum => "inv_config_reconciler_status_kind", IpAttachStateEnum => "ip_attach_state", IpKindEnum => "ip_kind", IpPoolResourceTypeEnum => "ip_pool_resource_type", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 105df2cc45d..fce13f8a1fd 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1582,6 +1582,45 @@ table! { usable_hardware_threads -> Int8, usable_physical_ram -> Int8, reservoir_size -> Int8, + + ledgered_sled_config -> Nullable, + last_reconciliation_sled_config -> Nullable, + reconciler_status_kind -> crate::enums::InvConfigReconcilerStatusKindEnum, + reconciler_status_sled_config -> Nullable, + reconciler_status_timestamp -> Nullable, + reconciler_status_duration_secs -> Nullable, + } +} + +table! { + inv_last_reconciliation_disk_result (inv_collection_id, sled_id, disk_id) { + inv_collection_id -> Uuid, + sled_id -> Uuid, + disk_id -> Uuid, + + error_message -> Nullable, + } +} + +table! { + inv_last_reconciliation_dataset_result + (inv_collection_id, sled_id, dataset_id) + { + inv_collection_id -> Uuid, + sled_id -> Uuid, + dataset_id -> Uuid, + + error_message -> Nullable, + } +} + +table! { + inv_last_reconciliation_zone_result (inv_collection_id, sled_id, zone_id) { + inv_collection_id -> Uuid, + sled_id -> Uuid, + zone_id -> Uuid, + + error_message -> Nullable, } } @@ -1639,20 +1678,20 @@ table! { } table! { - inv_sled_omicron_zones (inv_collection_id, sled_id) { + inv_omicron_sled_config (inv_collection_id, sled_id, id) { inv_collection_id -> Uuid, - time_collected -> Timestamptz, - source -> Text, sled_id -> Uuid, + id -> Uuid, generation -> Int8, + remove_mupdate_override -> Nullable, } } table! { - inv_omicron_zone (inv_collection_id, id) { - inv_collection_id -> Uuid, + inv_omicron_sled_config_zone (sled_config_id, id) { sled_id -> Uuid, + sled_config_id -> Uuid, id -> Uuid, zone_type -> crate::enums::ZoneTypeEnum, @@ -1678,8 +1717,8 @@ table! { } table! { - inv_omicron_zone_nic (inv_collection_id, id) { - inv_collection_id -> Uuid, + inv_omicron_sled_config_zone_nic (sled_config_id, id) { + sled_config_id -> Uuid, id -> Uuid, name -> Text, ip -> Inet, @@ -1691,6 +1730,36 @@ table! { } } +table! { + inv_omicron_sled_config_dataset (sled_config_id, id) { + sled_config_id -> Uuid, + sled_id -> Uuid, + id -> Uuid, + + pool_id -> Uuid, + kind -> crate::enums::DatasetKindEnum, + zone_name -> Nullable, + + quota -> Nullable, + reservation -> Nullable, + compression -> Text, + } +} + +table! { + inv_omicron_sled_config_disk (sled_config_id, id) { + sled_config_id -> Uuid, + sled_id -> Uuid, + id -> Uuid, + + vendor -> Text, + serial -> Text, + model -> Text, + + pool_id -> Uuid, + } +} + table! { inv_clickhouse_keeper_membership (inv_collection_id, queried_keeper_id) { inv_collection_id -> Uuid, @@ -2151,14 +2220,35 @@ allow_tables_to_appear_in_same_query!(external_ip, project); allow_tables_to_appear_in_same_query!(external_ip, ip_pool_resource); allow_tables_to_appear_in_same_query!(external_ip, vmm); allow_tables_to_appear_in_same_query!(external_ip, network_interface); -allow_tables_to_appear_in_same_query!(external_ip, inv_omicron_zone); -allow_tables_to_appear_in_same_query!(external_ip, inv_omicron_zone_nic); -allow_tables_to_appear_in_same_query!(inv_omicron_zone, inv_omicron_zone_nic); -allow_tables_to_appear_in_same_query!(network_interface, inv_omicron_zone); -allow_tables_to_appear_in_same_query!(network_interface, inv_omicron_zone_nic); +allow_tables_to_appear_in_same_query!( + external_ip, + inv_omicron_sled_config_zone +); +allow_tables_to_appear_in_same_query!( + external_ip, + inv_omicron_sled_config_zone_nic +); +allow_tables_to_appear_in_same_query!( + inv_omicron_sled_config_zone, + inv_omicron_sled_config_zone_nic +); +allow_tables_to_appear_in_same_query!( + network_interface, + inv_omicron_sled_config_zone +); +allow_tables_to_appear_in_same_query!( + network_interface, + inv_omicron_sled_config_zone_nic +); allow_tables_to_appear_in_same_query!(network_interface, inv_collection); -allow_tables_to_appear_in_same_query!(inv_omicron_zone, inv_collection); -allow_tables_to_appear_in_same_query!(inv_omicron_zone_nic, inv_collection); +allow_tables_to_appear_in_same_query!( + inv_omicron_sled_config_zone, + inv_collection +); +allow_tables_to_appear_in_same_query!( + inv_omicron_sled_config_zone_nic, + inv_collection +); allow_tables_to_appear_in_same_query!(external_ip, inv_collection); allow_tables_to_appear_in_same_query!(external_ip, internet_gateway); allow_tables_to_appear_in_same_query!(external_ip, internet_gateway_ip_pool); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 2636fb565cc..73eb1857794 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3569,6 +3569,13 @@ CREATE TYPE IF NOT EXISTS omicron.public.sled_role AS ENUM ( 'gimlet' ); +CREATE TYPE IF NOT EXISTS omicron.public.inv_config_reconciler_status_kind +AS ENUM ( + 'not-yet-run', + 'running', + 'idle' +); + -- observations from and about sled agents CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( -- where this observation came from @@ -3599,6 +3606,44 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( usable_physical_ram INT8 NOT NULL, reservoir_size INT8 CHECK (reservoir_size < usable_physical_ram) NOT NULL, + -- Currently-ledgered `OmicronSledConfig` + -- (foreign key into `inv_omicron_sled_config` table) + -- This is optional because newly-added sleds don't yet have a config. + ledgered_sled_config UUID, + + -- Most-recently-reconciled `OmicronSledConfig` + -- (foreign key into `inv_omicron_sled_config` table) + -- This is optional because the reconciler may not have run yet + last_reconciliation_sled_config UUID, + + -- Columns making up the status of the config reconciler. + reconciler_status_kind inv_config_reconciler_status_kind NOT NULL, + -- (foreign key into `inv_omicron_sled_config` table) + -- only present if `inv_config_reconciler_status_kind = 'running'` + reconciler_status_sled_config UUID CHECK ( + (inv_config_reconciler_status_kind = 'running' + AND reconciler_status_sled_config IS NOT NULL) + OR + (inv_config_reconciler_status_kind != 'running' + AND reconciler_status_sled_config IS NULL) + ), + -- only present if `inv_config_reconciler_status_kind != 'not-yet-run'` + reconciler_status_timestamp TIMESTAMPTZ CHECK ( + (inv_config_reconciler_status_kind = 'not-yet-run' + AND reconciler_status_sled_config IS NULL) + OR + (inv_config_reconciler_status_kind != 'not-yet-run' + AND reconciler_status_sled_config IS NOT NULL) + ), + -- only present if `inv_config_reconciler_status_kind != 'not-yet-run'` + reconciler_status_duration_secs FLOAT CHECK ( + (inv_config_reconciler_status_kind = 'not-yet-run' + AND reconciler_status_sled_config IS NULL) + OR + (inv_config_reconciler_status_kind != 'not-yet-run' + AND reconciler_status_sled_config IS NOT NULL) + ), + PRIMARY KEY (inv_collection_id, sled_id) ); @@ -3702,25 +3747,81 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_dataset ( PRIMARY KEY (inv_collection_id, sled_id, name) ); --- TODO: This table is vestigial and can be combined with `inv_sled_agent`. See --- https://github.com/oxidecomputer/omicron/issues/6770. -CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_omicron_zones ( +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config ( -- where this observation came from -- (foreign key into `inv_collection` table) inv_collection_id UUID NOT NULL, - -- when this observation was made - time_collected TIMESTAMPTZ NOT NULL, - -- URL of the sled agent that reported this data - source TEXT NOT NULL, -- unique id for this sled (should be foreign keys into `sled` table, though -- it's conceivable a sled will report an id that we don't know about) sled_id UUID NOT NULL, - -- OmicronZonesConfig generation reporting these zones + -- ID of this sled config. A given inventory report from a sled agent may + -- contain 0-3 sled configs, so we generate these IDs on insertion and + -- record them as the foreign keys in `inv_sled_agent`. + id UUID NOT NULL, + + -- config generation generation INT8 NOT NULL, - PRIMARY KEY (inv_collection_id, sled_id) + -- remove mupdate override ID, if set + remove_mupdate_override UUID, + + PRIMARY KEY (inv_collection_id, sled_id, id) +); + +CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_disk_result ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + + -- unique id for this physical disk + disk_id UUID NOT NULL, + + -- error message; if NULL, an "ok" result + error_message TEXT, + + PRIMARY KEY (inv_collection_id, sled_id, disk_id) +); + +CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_dataset_result ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + + -- unique id for this dataset + dataset_id UUID NOT NULL, + + -- error message; if NULL, an "ok" result + error_message TEXT, + + PRIMARY KEY (inv_collection_id, sled_id, dataset_id) +); + +CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_zone_result ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + + -- unique id for this zone + zone_id UUID NOT NULL, + + -- error message; if NULL, an "ok" result + error_message TEXT, + + PRIMARY KEY (inv_collection_id, sled_id, zone_id) ); CREATE TYPE IF NOT EXISTS omicron.public.zone_type AS ENUM ( @@ -3738,11 +3839,11 @@ CREATE TYPE IF NOT EXISTS omicron.public.zone_type AS ENUM ( 'oximeter' ); --- observations from sled agents about Omicron-managed zones -CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( +-- `zones` portion of an `OmicronSledConfig` observed from sled-agent +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone ( -- where this observation came from - -- (foreign key into `inv_collection` table) - inv_collection_id UUID NOT NULL, + -- (foreign key into `inv_omicron_sled_config` table) + sled_config_id UUID NOT NULL, -- unique id for this sled (should be foreign keys into `sled` table, though -- it's conceivable a sled will report an id that we don't know about) @@ -3778,7 +3879,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( dataset_zpool_name TEXT, -- Zones with external IPs have an associated NIC and sockaddr for listening - -- (first is a foreign key into `inv_omicron_zone_nic`) + -- (first is a foreign key into `inv_omicron_sled_config_zone_nic`) nic_id UUID, -- Properties for internal DNS servers @@ -3807,14 +3908,21 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( -- Eventually, that nullability should be removed. filesystem_pool UUID, - PRIMARY KEY (inv_collection_id, id) + PRIMARY KEY (sled_config_id, id) ); -CREATE INDEX IF NOT EXISTS inv_omicron_zone_nic_id ON omicron.public.inv_omicron_zone - (nic_id) STORING (sled_id, primary_service_ip, second_service_ip, snat_ip); +CREATE INDEX IF NOT EXISTS inv_omicron_sled_config_zone_nic_id + ON omicron.public.inv_omicron_sled_config_zone (nic_id) + STORING ( + sled_id, + sled_config_id, + primary_service_ip, + second_service_ip, + snat_ip + ); -CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone_nic ( - inv_collection_id UUID NOT NULL, +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone_nic ( + sled_config_id UUID NOT NULL, id UUID NOT NULL, name TEXT NOT NULL, ip INET NOT NULL, @@ -3824,7 +3932,45 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone_nic ( is_primary BOOLEAN NOT NULL, slot INT2 NOT NULL, - PRIMARY KEY (inv_collection_id, id) + PRIMARY KEY (sled_config_id, id) +); + +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_dataset ( + -- foreign key into the `inv_omicron_sled_config` table + sled_config_id UUID NOT NULL, + sled_id UUID NOT NULL, + id UUID NOT NULL, + + pool_id UUID NOT NULL, + kind omicron.public.dataset_kind NOT NULL, + -- Only valid if kind = zone + zone_name TEXT, + + quota INT8, + reservation INT8, + compression TEXT NOT NULL, + + CONSTRAINT zone_name_for_zone_kind CHECK ( + (kind != 'zone') OR + (kind = 'zone' AND zone_name IS NOT NULL) + ), + + PRIMARY KEY (sled_config_id, id) +); + +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_disk ( + -- foreign key into the `inv_omicron_sled_config` table + sled_config_id UUID NOT NULL, + sled_id UUID NOT NULL, + id UUID NOT NULL, + + vendor TEXT NOT NULL, + serial TEXT NOT NULL, + model TEXT NOT NULL, + + pool_id UUID NOT NULL, + + PRIMARY KEY (sled_config_id, id) ); CREATE TABLE IF NOT EXISTS omicron.public.inv_clickhouse_keeper_membership ( @@ -4030,11 +4176,6 @@ CREATE TYPE IF NOT EXISTS omicron.public.bp_zone_image_source AS ENUM ( ); -- description of omicron zones specified in a blueprint --- --- This is currently identical to `inv_omicron_zone`, except that the foreign --- keys reference other blueprint tables intead of inventory tables. We expect --- their sameness to diverge over time as either inventory or blueprints (or --- both) grow context-specific properties. CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone ( -- foreign key into the `blueprint` table blueprint_id UUID NOT NULL, From 655528b186058a08f25ebe97b7c06067c0d1a474 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 16 May 2025 17:15:18 -0400 Subject: [PATCH 03/22] wip: first pass at inserts --- nexus/db-model/src/inventory.rs | 134 ++++- .../db-queries/src/db/datastore/inventory.rs | 460 +++++++++++++++++- 2 files changed, 569 insertions(+), 25 deletions(-) diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index a50ed33c243..601a96ebd87 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -36,18 +36,22 @@ use nexus_db_schema::schema::{ sw_root_of_trust_page, }; use nexus_sled_agent_shared::inventory::{ - OmicronZoneConfig, OmicronZoneDataset, OmicronZoneImageSource, - OmicronZoneType, + ConfigReconcilerInventoryResult, OmicronSledConfig, OmicronZoneConfig, + OmicronZoneDataset, OmicronZoneImageSource, OmicronZoneType, }; use nexus_types::inventory::{ BaseboardId, Caboose, Collection, NvmeFirmware, PowerState, RotPage, RotSlot, }; use omicron_common::api::internal::shared::NetworkInterface; +use omicron_common::disk::DatasetConfig; +use omicron_common::disk::OmicronPhysicalDiskConfig; use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::DatasetKind; +use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::MupdateOverrideKind; +use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledKind; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolKind; @@ -908,15 +912,55 @@ pub struct InvLastReconciliationDiskResult { pub error_message: Option, } +impl InvLastReconciliationDiskResult { + pub fn new( + inv_collection_id: CollectionUuid, + sled_id: SledUuid, + disk_id: PhysicalDiskUuid, + result: ConfigReconcilerInventoryResult, + ) -> Self { + let error_message = match result { + ConfigReconcilerInventoryResult::Ok => None, + ConfigReconcilerInventoryResult::Err { message } => Some(message), + }; + Self { + inv_collection_id: inv_collection_id.into(), + sled_id: sled_id.into(), + disk_id: disk_id.into(), + error_message, + } + } +} + #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_last_reconciliation_dataset_result)] pub struct InvLastReconciliationDatasetResult { pub inv_collection_id: DbTypedUuid, pub sled_id: DbTypedUuid, - pub dataset_id: DbTypedUuid, + pub dataset_id: DbTypedUuid, pub error_message: Option, } +impl InvLastReconciliationDatasetResult { + pub fn new( + inv_collection_id: CollectionUuid, + sled_id: SledUuid, + dataset_id: DatasetUuid, + result: ConfigReconcilerInventoryResult, + ) -> Self { + let error_message = match result { + ConfigReconcilerInventoryResult::Ok => None, + ConfigReconcilerInventoryResult::Err { message } => Some(message), + }; + Self { + inv_collection_id: inv_collection_id.into(), + sled_id: sled_id.into(), + dataset_id: dataset_id.into(), + error_message, + } + } +} + #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_last_reconciliation_zone_result)] pub struct InvLastReconciliationZoneResult { @@ -926,6 +970,26 @@ pub struct InvLastReconciliationZoneResult { pub error_message: Option, } +impl InvLastReconciliationZoneResult { + pub fn new( + inv_collection_id: CollectionUuid, + sled_id: SledUuid, + zone_id: OmicronZoneUuid, + result: ConfigReconcilerInventoryResult, + ) -> Self { + let error_message = match result { + ConfigReconcilerInventoryResult::Ok => None, + ConfigReconcilerInventoryResult::Err { message } => Some(message), + }; + Self { + inv_collection_id: inv_collection_id.into(), + sled_id: sled_id.into(), + zone_id: zone_id.into(), + error_message, + } + } +} + /// See [`nexus_types::inventory::PhysicalDisk`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_physical_disk)] @@ -1245,8 +1309,7 @@ impl From for nexus_types::inventory::Dataset { } } -/// Top-level information contained in an -/// [`nexus_sled_agent_shared::inventory::OmicronSledConfig`]. +/// Top-level information contained in an [`OmicronSledConfig`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_omicron_sled_config)] pub struct InvOmicronSledConfig { @@ -1254,7 +1317,26 @@ pub struct InvOmicronSledConfig { pub sled_id: DbTypedUuid, pub id: Uuid, pub generation: Generation, - pub remove_mupdate_override: DbTypedUuid, + pub remove_mupdate_override: Option>, +} + +impl InvOmicronSledConfig { + pub fn new( + inv_collection_id: CollectionUuid, + sled_id: SledUuid, + id: Uuid, + config: &OmicronSledConfig, + ) -> Self { + Self { + inv_collection_id: inv_collection_id.into(), + sled_id: sled_id.into(), + id, + generation: Generation(config.generation), + remove_mupdate_override: config + .remove_mupdate_override + .map(From::from), + } + } } impl_enum_type!( @@ -1350,7 +1432,25 @@ pub struct InvOmicronSledConfigDisk { pub serial: String, pub model: String, - pub pool_id: Uuid, + pub pool_id: DbTypedUuid, +} + +impl InvOmicronSledConfigDisk { + pub fn new( + sled_config_id: Uuid, + sled_id: SledUuid, + disk_config: OmicronPhysicalDiskConfig, + ) -> Self { + Self { + sled_config_id, + sled_id: sled_id.into(), + id: disk_config.id.into(), + vendor: disk_config.identity.vendor, + serial: disk_config.identity.serial, + model: disk_config.identity.model, + pool_id: disk_config.pool_id.into(), + } + } } /// See [`omicron_common::disk::DatasetConfig`]. @@ -1370,6 +1470,26 @@ pub struct InvOmicronSledConfigDataset { pub compression: String, } +impl InvOmicronSledConfigDataset { + pub fn new( + sled_config_id: Uuid, + sled_id: SledUuid, + dataset_config: &DatasetConfig, + ) -> Self { + Self { + sled_config_id, + sled_id: sled_id.into(), + id: dataset_config.id.into(), + pool_id: dataset_config.name.pool().id().into(), + kind: dataset_config.name.kind().into(), + zone_name: dataset_config.name.kind().zone_name().map(String::from), + quota: dataset_config.inner.quota.map(|q| q.into()), + reservation: dataset_config.inner.reservation.map(|r| r.into()), + compression: dataset_config.inner.compression.to_string(), + } + } +} + /// See [`nexus_sled_agent_shared::inventory::OmicronZoneConfig`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_omicron_sled_config_zone)] diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index fa02c88cbee..b4e33cb2e51 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -35,8 +35,18 @@ use nexus_db_model::InvCaboose; use nexus_db_model::InvClickhouseKeeperMembership; use nexus_db_model::InvCollection; use nexus_db_model::InvCollectionError; +use nexus_db_model::InvConfigReconcilerStatus; +use nexus_db_model::InvConfigReconcilerStatusKind; use nexus_db_model::InvDataset; +use nexus_db_model::InvLastReconciliationDatasetResult; +use nexus_db_model::InvLastReconciliationDiskResult; +use nexus_db_model::InvLastReconciliationZoneResult; use nexus_db_model::InvNvmeDiskFirmware; +use nexus_db_model::InvOmicronSledConfig; +use nexus_db_model::InvOmicronSledConfigDataset; +use nexus_db_model::InvOmicronSledConfigDisk; +use nexus_db_model::InvOmicronSledConfigZone; +use nexus_db_model::InvOmicronSledConfigZoneNic; use nexus_db_model::InvPhysicalDisk; use nexus_db_model::InvRootOfTrust; use nexus_db_model::InvRotPage; @@ -51,17 +61,21 @@ use nexus_db_model::SqlU32; use nexus_db_model::SwCaboose; use nexus_db_model::SwRotPage; use nexus_db_model::to_db_typed_uuid; -use nexus_db_schema::enums::CabooseWhichEnum; use nexus_db_schema::enums::HwPowerStateEnum; use nexus_db_schema::enums::HwRotSlotEnum; use nexus_db_schema::enums::RotImageErrorEnum; use nexus_db_schema::enums::RotPageWhichEnum; use nexus_db_schema::enums::SledRoleEnum; use nexus_db_schema::enums::SpTypeEnum; +use nexus_db_schema::enums::{ + CabooseWhichEnum, InvConfigReconcilerStatusKindEnum, +}; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; +use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use nexus_types::inventory::PhysicalDiskFirmware; +use nexus_types::inventory::SledAgent; use omicron_common::api::external::Error; use omicron_common::api::external::InternalContext; use omicron_common::api::external::LookupType; @@ -221,6 +235,23 @@ impl DataStore { }) .collect(); + // Build up a list of `OmicronSledConfig`s we need to insert (each sled + // has 0-3), all of their zones, zone NICs, datasets, and disks, and + // also collect the config reconciler properties for each sled. We need + // these to construct `InvSledAgent` instances below. + let ConfigReconcilerRows { + sled_configs: omicron_sled_configs, + disks: omicron_sled_config_disks, + datasets: omicron_sled_config_datasets, + zones: omicron_sled_config_zones, + zone_nics: omicron_sled_config_zone_nics, + disk_results: reconciler_disk_results, + dataset_results: reconciler_dataset_results, + zone_results: reconciler_zone_results, + mut config_reconciler_fields_by_sled, + } = ConfigReconcilerRows::new(collection_id, collection) + .map_err(|e| Error::internal_error(&format!("{e:#}")))?; + // Partition the sled agents into those with an associated baseboard id // and those without one. We handle these pretty differently. let (sled_agents_baseboards, sled_agents_no_baseboards): ( @@ -234,26 +265,24 @@ impl DataStore { .into_iter() .map(|sled_agent| { assert!(sled_agent.baseboard_id.is_none()); - InvSledAgent::new_without_baseboard(collection_id, sled_agent) - .map_err(|e| Error::internal_error(&e.to_string())) + let ConfigReconcilerFields { + ledgered_sled_config, + last_reconciliation_sled_config, + reconciler_status, + } = config_reconciler_fields_by_sled + .remove(&sled_agent.sled_id) + .expect("all sled IDs should exist"); + InvSledAgent::new_without_baseboard( + collection_id, + sled_agent, + ledgered_sled_config, + last_reconciliation_sled_config, + reconciler_status, + ) + .map_err(|e| Error::internal_error(&e.to_string())) }) .collect::, Error>>()?; - /* - let omicron_zone_nics = collection - .sled_agents - .values() - .flat_map(|sled_agent| { - sled_agent.omicron_zones.zones.iter().filter_map(|found_zone| { - InvOmicronZoneNic::new(collection_id, found_zone) - .with_context(|| format!("zone {:?}", found_zone.id)) - .map_err(|e| Error::internal_error(&format!("{:#}", e))) - .transpose() - }) - }) - .collect::, _>>()?; - */ - let mut inv_clickhouse_keeper_memberships = Vec::new(); for membership in &collection.clickhouse_keeper_cluster_membership { inv_clickhouse_keeper_memberships.push( @@ -852,6 +881,158 @@ impl DataStore { } } + // Insert rows for the all the sled configs we found. + { + use nexus_db_schema::schema::inv_omicron_sled_config::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut configs = omicron_sled_configs.into_iter(); + loop { + let some_configs = + configs.by_ref().take(batch_size).collect::>(); + if some_configs.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_omicron_sled_config) + .values(some_configs) + .execute_async(&conn) + .await?; + } + } + + // Insert rows for the all the sled configs' disks we found. + { + use nexus_db_schema::schema::inv_omicron_sled_config_disk::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut disks = omicron_sled_config_disks.into_iter(); + loop { + let some_disks = + disks.by_ref().take(batch_size).collect::>(); + if some_disks.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_omicron_sled_config_disk) + .values(some_disks) + .execute_async(&conn) + .await?; + } + } + + // Insert rows for the all the sled configs' datasets we found. + { + use nexus_db_schema::schema::inv_omicron_sled_config_dataset::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut datasets = omicron_sled_config_datasets.into_iter(); + loop { + let some_datasets = + datasets.by_ref().take(batch_size).collect::>(); + if some_datasets.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_omicron_sled_config_dataset) + .values(some_datasets) + .execute_async(&conn) + .await?; + } + } + + // Insert rows for the all the sled configs' zones we found. + { + use nexus_db_schema::schema::inv_omicron_sled_config_zone::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut zones = omicron_sled_config_zones.into_iter(); + loop { + let some_zones = + zones.by_ref().take(batch_size).collect::>(); + if some_zones.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_omicron_sled_config_zone) + .values(some_zones) + .execute_async(&conn) + .await?; + } + } + + // Insert rows for the all the sled configs' zones' nics we found. + { + use nexus_db_schema::schema::inv_omicron_sled_config_zone_nic::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut zone_nics = omicron_sled_config_zone_nics.into_iter(); + loop { + let some_zone_nics = + zone_nics.by_ref().take(batch_size).collect::>(); + if some_zone_nics.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_omicron_sled_config_zone_nic) + .values(some_zone_nics) + .execute_async(&conn) + .await?; + } + } + + // Insert rows for all the sled config reconciler disk results + { + use nexus_db_schema::schema::inv_last_reconciliation_disk_result::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut disk_results = reconciler_disk_results.into_iter(); + loop { + let some_disk_results = + disk_results.by_ref().take(batch_size).collect::>(); + if some_disk_results.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_last_reconciliation_disk_result) + .values(some_disk_results) + .execute_async(&conn) + .await?; + } + } + + // Insert rows for all the sled config reconciler dataset results + { + use nexus_db_schema::schema::inv_last_reconciliation_dataset_result::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut dataset_results = reconciler_dataset_results.into_iter(); + loop { + let some_dataset_results = + dataset_results.by_ref().take(batch_size).collect::>(); + if some_dataset_results.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_last_reconciliation_dataset_result) + .values(some_dataset_results) + .execute_async(&conn) + .await?; + } + } + + // Insert rows for all the sled config reconciler zone results + { + use nexus_db_schema::schema::inv_last_reconciliation_zone_result::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut zone_results = reconciler_zone_results.into_iter(); + loop { + let some_zone_results = + zone_results.by_ref().take(batch_size).collect::>(); + if some_zone_results.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_last_reconciliation_zone_result) + .values(some_zone_results) + .execute_async(&conn) + .await?; + } + } + // Insert rows for the sled agents that we found. In practice, we'd // expect these to all have baseboards (if using Oxide hardware) or // none have baseboards (if not). @@ -866,6 +1047,13 @@ impl DataStore { let baseboard_id = sled_agent.baseboard_id.as_ref().expect( "already selected only sled agents with baseboards", ); + let ConfigReconcilerFields { + ledgered_sled_config, + last_reconciliation_sled_config, + reconciler_status, + } = config_reconciler_fields_by_sled + .remove(&sled_agent.sled_id) + .expect("all sled IDs should exist"); let selection = nexus_db_schema::schema::hw_baseboard_id::table .select(( db_collection_id @@ -898,6 +1086,18 @@ impl DataStore { sled_agent.reservoir_size, ) .into_sql::(), + ledgered_sled_config + .into_sql::>(), + last_reconciliation_sled_config + .into_sql::>(), + reconciler_status.reconciler_status_kind + .into_sql::(), + reconciler_status.reconciler_status_sled_config + .into_sql::>(), + reconciler_status.reconciler_status_timestamp + .into_sql::>(), + reconciler_status.reconciler_status_duration_secs + .into_sql::>(), )) .filter( baseboard_dsl::part_number @@ -923,6 +1123,12 @@ impl DataStore { sa_dsl::usable_hardware_threads, sa_dsl::usable_physical_ram, sa_dsl::reservoir_size, + sa_dsl::ledgered_sled_config, + sa_dsl::last_reconciliation_sled_config, + sa_dsl::reconciler_status_kind, + sa_dsl::reconciler_status_sled_config, + sa_dsl::reconciler_status_timestamp, + sa_dsl::reconciler_status_duration_secs, )) .execute_async(&conn) .await?; @@ -942,6 +1148,12 @@ impl DataStore { _usable_hardware_threads, _usable_physical_ram, _reservoir_size, + _ledgered_sled_config, + _last_reconciliation_sled_config, + _reconciler_status_kind, + _reconciler_status_sled_config, + _reconciler_status_timestamp, + _reconciler_status_duration_secs, ) = sa_dsl::inv_sled_agent::all_columns(); } @@ -1363,6 +1575,10 @@ impl DataStore { .await? }; + let nsled_agent_zones = 0; + let nzones = 0; + let nnics = 0; + /* // Remove rows associated with Omicron zones let nsled_agent_zones = { use nexus_db_schema::schema::inv_sled_omicron_zones::dsl; @@ -1390,6 +1606,7 @@ impl DataStore { .execute_async(&conn) .await? }; + */ let nzpools = { use nexus_db_schema::schema::inv_zpool::dsl; @@ -2409,6 +2626,213 @@ impl DataStore { } } +#[derive(Debug)] +struct ConfigReconcilerFields { + ledgered_sled_config: Option, + last_reconciliation_sled_config: Option, + reconciler_status: InvConfigReconcilerStatus, +} + +// Helper to build the sets of rows for all the `OmicronSledConfig`s and +// per-sled config reconciler status rows for an inventory collection. +#[derive(Debug, Default)] +struct ConfigReconcilerRows { + sled_configs: Vec, + disks: Vec, + datasets: Vec, + zones: Vec, + zone_nics: Vec, + disk_results: Vec, + dataset_results: Vec, + zone_results: Vec, + config_reconciler_fields_by_sled: + BTreeMap, +} + +impl ConfigReconcilerRows { + fn new( + collection_id: CollectionUuid, + collection: &Collection, + ) -> anyhow::Result { + let mut slf = Self::default(); + for (sled_id, sled_agent) in &collection.sled_agents { + slf.accumulate(collection_id, *sled_id, sled_agent)?; + } + Ok(slf) + } + + fn accumulate( + &mut self, + collection_id: CollectionUuid, + sled_id: SledUuid, + sled_agent: &SledAgent, + ) -> anyhow::Result<()> { + let mut ledgered_sled_config = None; + if let Some(config) = &sled_agent.ledgered_sled_config { + ledgered_sled_config = + Some(self.accumulate_config(collection_id, sled_id, config)?); + } + + let mut last_reconciliation_sled_config = None; + if let Some(last_reconciliation) = &sled_agent.last_reconciliation { + // If this config exactly matches the ledgered sled config, we can + // reuse the foreign key; otherwise, accumulate the new one. + if Some(&last_reconciliation.last_reconciled_config) + == sled_agent.ledgered_sled_config.as_ref() + { + last_reconciliation_sled_config = ledgered_sled_config; + } else { + last_reconciliation_sled_config = + Some(self.accumulate_config( + collection_id, + sled_id, + &last_reconciliation.last_reconciled_config, + )?); + } + + self.disk_results.extend( + last_reconciliation.external_disks.iter().map( + |(disk_id, result)| { + InvLastReconciliationDiskResult::new( + collection_id, + sled_id, + *disk_id, + result.clone(), + ) + }, + ), + ); + self.dataset_results.extend( + last_reconciliation.datasets.iter().map( + |(dataset_id, result)| { + InvLastReconciliationDatasetResult::new( + collection_id, + sled_id, + *dataset_id, + result.clone(), + ) + }, + ), + ); + self.zone_results.extend(last_reconciliation.zones.iter().map( + |(zone_id, result)| { + InvLastReconciliationZoneResult::new( + collection_id, + sled_id, + *zone_id, + result.clone(), + ) + }, + )); + } + + let reconciler_status = match &sled_agent.reconciler_status { + ConfigReconcilerInventoryStatus::NotYetRun => { + InvConfigReconcilerStatus { + reconciler_status_kind: + InvConfigReconcilerStatusKind::NotYetRun, + reconciler_status_sled_config: None, + reconciler_status_timestamp: None, + reconciler_status_duration_secs: None, + } + } + ConfigReconcilerInventoryStatus::Running { + config, + started_at, + running_for, + } => { + // If this config exactly matches the ledgered or + // most-recently-reconciled configs, we can reuse those IDs. + // Otherwise, accumulate a new one. + let reconciler_status_sled_config = if Some(config) + == sled_agent.ledgered_sled_config.as_ref() + { + ledgered_sled_config + } else if Some(config) + == sled_agent + .last_reconciliation + .as_ref() + .map(|lr| &lr.last_reconciled_config) + { + last_reconciliation_sled_config + } else { + Some(self.accumulate_config( + collection_id, + sled_id, + config, + )?) + }; + InvConfigReconcilerStatus { + reconciler_status_kind: + InvConfigReconcilerStatusKind::Running, + reconciler_status_sled_config, + reconciler_status_timestamp: Some(*started_at), + reconciler_status_duration_secs: Some( + running_for.as_secs_f64(), + ), + } + } + ConfigReconcilerInventoryStatus::Idle { completed_at, ran_for } => { + InvConfigReconcilerStatus { + reconciler_status_kind: InvConfigReconcilerStatusKind::Idle, + reconciler_status_sled_config: None, + reconciler_status_timestamp: Some(*completed_at), + reconciler_status_duration_secs: Some( + ran_for.as_secs_f64(), + ), + } + } + }; + + self.config_reconciler_fields_by_sled.insert( + sled_id, + ConfigReconcilerFields { + ledgered_sled_config, + last_reconciliation_sled_config, + reconciler_status, + }, + ); + + Ok(()) + } + + fn accumulate_config( + &mut self, + collection_id: CollectionUuid, + sled_id: SledUuid, + config: &OmicronSledConfig, + ) -> anyhow::Result { + let sled_config_id = Uuid::new_v4(); + + self.sled_configs.push(InvOmicronSledConfig::new( + collection_id, + sled_id, + sled_config_id, + config, + )); + self.disks.extend(config.disks.iter().map(|disk| { + InvOmicronSledConfigDisk::new(sled_config_id, sled_id, disk.clone()) + })); + self.datasets.extend(config.datasets.iter().map(|dataset| { + InvOmicronSledConfigDataset::new(sled_config_id, sled_id, dataset) + })); + for zone in &config.zones { + self.zones.push(InvOmicronSledConfigZone::new( + sled_config_id, + sled_id, + zone, + )?); + if let Some(nic) = + InvOmicronSledConfigZoneNic::new(sled_config_id, zone)? + { + self.zone_nics.push(nic); + } + } + + Ok(sled_config_id) + } +} + /// Extra interfaces that are not intended (and potentially unsafe) for use in /// Nexus, but useful for testing and `omdb` pub trait DataStoreInventoryTest: Send + Sync { From d409d4ac29b109b1cf25535b9a644e4b2ba658c6 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 19 May 2025 13:42:12 -0400 Subject: [PATCH 04/22] continued wip: reading collections back out --- nexus/db-model/src/inventory.rs | 144 +++- .../db-queries/src/db/datastore/inventory.rs | 634 +++++++++++++----- nexus/db-schema/src/schema.rs | 4 +- nexus/inventory/src/examples.rs | 114 +++- schema/crdb/dbinit.sql | 40 +- 5 files changed, 730 insertions(+), 206 deletions(-) diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 601a96ebd87..cb886e3a183 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -35,6 +35,7 @@ use nexus_db_schema::schema::{ inv_service_processor, inv_sled_agent, inv_zpool, sw_caboose, sw_root_of_trust_page, }; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::{ ConfigReconcilerInventoryResult, OmicronSledConfig, OmicronZoneConfig, OmicronZoneDataset, OmicronZoneImageSource, OmicronZoneType, @@ -45,6 +46,8 @@ use nexus_types::inventory::{ }; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::disk::DatasetConfig; +use omicron_common::disk::DatasetName; +use omicron_common::disk::DiskIdentity; use omicron_common::disk::OmicronPhysicalDiskConfig; use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::DatasetKind; @@ -60,6 +63,7 @@ use omicron_uuid_kinds::{CollectionKind, OmicronZoneKind}; use omicron_uuid_kinds::{CollectionUuid, OmicronZoneUuid}; use std::collections::BTreeSet; use std::net::{IpAddr, SocketAddrV6}; +use std::time::Duration; use thiserror::Error; use uuid::Uuid; @@ -837,6 +841,62 @@ pub struct InvConfigReconcilerStatus { pub reconciler_status_duration_secs: Option, } +impl InvConfigReconcilerStatus { + /// Convert `self` to a [`ConfigReconcilerInventoryStatus`]. + /// + /// `get_config` should perform the lookup of a serialized + /// `OmicronSledConfig` given its ID. It will be called only if `self.kind` + /// is `InvConfigReconcilerStatusKind::Running`; the other variants do not + /// carry a sled config ID. + pub fn to_status( + &self, + get_config: F, + ) -> anyhow::Result + where + F: FnOnce(&Uuid) -> Option, + { + let status = match self.reconciler_status_kind { + InvConfigReconcilerStatusKind::NotYetRun => { + ConfigReconcilerInventoryStatus::NotYetRun + } + InvConfigReconcilerStatusKind::Running => { + let config_id = self.reconciler_status_sled_config.context( + "missing reconciler status sled config for kind 'running'", + )?; + let config = get_config(&config_id) + .context("missing sled config we should have fetched")?; + ConfigReconcilerInventoryStatus::Running { + config, + started_at: self.reconciler_status_timestamp.context( + "missing reconciler status timestamp \ + for kind 'running'", + )?, + running_for: Duration::from_secs_f64( + self.reconciler_status_duration_secs.context( + "missing reconciler status duration \ + for kind 'running'", + )?, + ), + } + } + InvConfigReconcilerStatusKind::Idle => { + ConfigReconcilerInventoryStatus::Idle { + completed_at: self.reconciler_status_timestamp.context( + "missing reconciler status timestamp for kind 'idle'", + )?, + ran_for: Duration::from_secs_f64( + self.reconciler_status_duration_secs.context( + "missing reconciler status duration \ + for kind 'idle'", + )?, + ), + } + } + }; + Ok(status) + } +} + // See [`nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus`]. impl_enum_type!( InvConfigReconcilerStatusKindEnum: @@ -932,6 +992,15 @@ impl InvLastReconciliationDiskResult { } } +impl From for ConfigReconcilerInventoryResult { + fn from(result: InvLastReconciliationDiskResult) -> Self { + match result.error_message { + None => Self::Ok, + Some(message) => Self::Err { message }, + } + } +} + #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_last_reconciliation_dataset_result)] pub struct InvLastReconciliationDatasetResult { @@ -961,6 +1030,17 @@ impl InvLastReconciliationDatasetResult { } } +impl From + for ConfigReconcilerInventoryResult +{ + fn from(result: InvLastReconciliationDatasetResult) -> Self { + match result.error_message { + None => Self::Ok, + Some(message) => Self::Err { message }, + } + } +} + #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_last_reconciliation_zone_result)] pub struct InvLastReconciliationZoneResult { @@ -990,6 +1070,15 @@ impl InvLastReconciliationZoneResult { } } +impl From for ConfigReconcilerInventoryResult { + fn from(result: InvLastReconciliationZoneResult) -> Self { + match result.error_message { + None => Self::Ok, + Some(message) => Self::Err { message }, + } + } +} + /// See [`nexus_types::inventory::PhysicalDisk`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_physical_disk)] @@ -1314,7 +1403,6 @@ impl From for nexus_types::inventory::Dataset { #[diesel(table_name = inv_omicron_sled_config)] pub struct InvOmicronSledConfig { pub inv_collection_id: DbTypedUuid, - pub sled_id: DbTypedUuid, pub id: Uuid, pub generation: Generation, pub remove_mupdate_override: Option>, @@ -1323,13 +1411,11 @@ pub struct InvOmicronSledConfig { impl InvOmicronSledConfig { pub fn new( inv_collection_id: CollectionUuid, - sled_id: SledUuid, id: Uuid, config: &OmicronSledConfig, ) -> Self { Self { inv_collection_id: inv_collection_id.into(), - sled_id: sled_id.into(), id, generation: Generation(config.generation), remove_mupdate_override: config @@ -1425,7 +1511,6 @@ impl From for ZoneType { #[diesel(table_name = inv_omicron_sled_config_disk)] pub struct InvOmicronSledConfigDisk { pub sled_config_id: Uuid, - pub sled_id: DbTypedUuid, pub id: DbTypedUuid, pub vendor: String, @@ -1438,12 +1523,10 @@ pub struct InvOmicronSledConfigDisk { impl InvOmicronSledConfigDisk { pub fn new( sled_config_id: Uuid, - sled_id: SledUuid, disk_config: OmicronPhysicalDiskConfig, ) -> Self { Self { sled_config_id, - sled_id: sled_id.into(), id: disk_config.id.into(), vendor: disk_config.identity.vendor, serial: disk_config.identity.serial, @@ -1453,12 +1536,25 @@ impl InvOmicronSledConfigDisk { } } +impl From for OmicronPhysicalDiskConfig { + fn from(disk: InvOmicronSledConfigDisk) -> Self { + Self { + identity: DiskIdentity { + vendor: disk.vendor, + serial: disk.serial, + model: disk.model, + }, + id: disk.id.into(), + pool_id: disk.pool_id.into(), + } + } +} + /// See [`omicron_common::disk::DatasetConfig`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_omicron_sled_config_dataset)] pub struct InvOmicronSledConfigDataset { pub sled_config_id: Uuid, - pub sled_id: DbTypedUuid, pub id: DbTypedUuid, pub pool_id: DbTypedUuid, @@ -1471,14 +1567,9 @@ pub struct InvOmicronSledConfigDataset { } impl InvOmicronSledConfigDataset { - pub fn new( - sled_config_id: Uuid, - sled_id: SledUuid, - dataset_config: &DatasetConfig, - ) -> Self { + pub fn new(sled_config_id: Uuid, dataset_config: &DatasetConfig) -> Self { Self { sled_config_id, - sled_id: sled_id.into(), id: dataset_config.id.into(), pool_id: dataset_config.name.pool().id().into(), kind: dataset_config.name.kind().into(), @@ -1490,12 +1581,35 @@ impl InvOmicronSledConfigDataset { } } +impl TryFrom for DatasetConfig { + type Error = anyhow::Error; + + fn try_from( + dataset: InvOmicronSledConfigDataset, + ) -> Result { + let pool = omicron_common::zpool_name::ZpoolName::new_external( + dataset.pool_id.into(), + ); + let kind = + crate::DatasetKind::try_into_api(dataset.kind, dataset.zone_name)?; + + Ok(Self { + id: dataset.id.into(), + name: DatasetName::new(pool, kind), + inner: omicron_common::disk::SharedDatasetConfig { + quota: dataset.quota.map(|b| b.into()), + reservation: dataset.reservation.map(|b| b.into()), + compression: dataset.compression.parse()?, + }, + }) + } +} + /// See [`nexus_sled_agent_shared::inventory::OmicronZoneConfig`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_omicron_sled_config_zone)] pub struct InvOmicronSledConfigZone { pub sled_config_id: Uuid, - pub sled_id: DbTypedUuid, pub id: DbTypedUuid, pub zone_type: ZoneType, pub primary_service_ip: ipv6::Ipv6Addr, @@ -1520,7 +1634,6 @@ pub struct InvOmicronSledConfigZone { impl InvOmicronSledConfigZone { pub fn new( sled_config_id: Uuid, - sled_id: SledUuid, zone: &OmicronZoneConfig, ) -> Result { // Create a dummy record to start, then fill in the rest @@ -1529,7 +1642,6 @@ impl InvOmicronSledConfigZone { // Fill in the known fields that don't require inspecting // `zone.zone_type` sled_config_id, - sled_id: sled_id.into(), id: zone.id.into(), filesystem_pool: zone .filesystem_pool diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index b4e33cb2e51..862d92d40c4 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -25,6 +25,7 @@ use diesel::expression::SelectableHelper; use diesel::sql_types::Nullable; use futures::FutureExt; use futures::future::BoxFuture; +use id_map::IdMap; use nexus_db_errors::ErrorHandler; use nexus_db_errors::public_error_from_diesel; use nexus_db_errors::public_error_from_diesel_lookup; @@ -70,6 +71,8 @@ use nexus_db_schema::enums::SpTypeEnum; use nexus_db_schema::enums::{ CabooseWhichEnum, InvConfigReconcilerStatusKindEnum, }; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_types::inventory::BaseboardId; @@ -82,7 +85,10 @@ use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::bail_unless; use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -173,34 +179,6 @@ impl DataStore { } } - /* - // Pull Omicron zone-related metadata out of all sled agents. - // - // TODO: InvSledOmicronZones is a vestigial table kept for backwards - // compatibility -- the only unique data within it (the generation - // number) can be moved into `InvSledAgent` in the future. See - // oxidecomputer/omicron#6770. - let sled_omicron_zones = collection - .sled_agents - .values() - .map(|sled_agent| { - InvSledOmicronZones::new(collection_id, sled_agent) - }) - .collect::>(); - - // Pull Omicron zones out of all sled agents. - let omicron_zones: Vec<_> = collection - .sled_agents - .iter() - .flat_map(|(sled_id, sled_agent)| { - sled_agent.omicron_zones.zones.iter().map(|zone| { - InvOmicronZone::new(collection_id, *sled_id, zone) - .map_err(|e| Error::internal_error(&e.to_string())) - }) - }) - .collect::, _>>()?; - */ - // Pull disks out of all sled agents let disks: Vec<_> = collection .sled_agents @@ -2336,101 +2314,115 @@ impl DataStore { ); } - /* - // Now read the Omicron zones. + // Now read the `OmicronSledConfig`s. // - // In the first pass, we'll load the "inv_sled_omicron_zones" records. - // There's one of these per sled. It does not contain the actual list - // of zones -- basically just collection metadata and the generation - // number. We'll assemble these directly into the data structure we're - // trying to build, which maps sled ids to objects describing the zones - // found on each sled. - let mut omicron_zones: BTreeMap = { - use nexus_db_schema::schema::inv_sled_omicron_zones::dsl; + // There are 0-3 sled configs per sled agent (ledgered sled + // config, last reconciled config, and actively-being-reconciled + // config). Each of these is keyed by a database-only ID; these IDs are + // referenced by `inv_sled_agent`. Before we read `inv_sled_agent`, + // build up the map of all the sled configs it might reference. + // + // In this first pass, we'll load the `inv_omicron_sled_config` records. + // It does not contain the actual disk/dataset/zone configs; we'll start + // with empty sets and build those up by reading additional tables + // below. + let mut omicron_sled_configs: BTreeMap = { + use nexus_db_schema::schema::inv_omicron_sled_config::dsl; - let mut zones = BTreeMap::new(); + let mut configs = BTreeMap::new(); let mut paginator = Paginator::new(batch_size); while let Some(p) = paginator.next() { let batch = paginated( - dsl::inv_sled_omicron_zones, - dsl::sled_id, + dsl::inv_omicron_sled_config, + dsl::id, &p.current_pagparams(), ) .filter(dsl::inv_collection_id.eq(db_id)) - .select(InvSledOmicronZones::as_select()) + .select(InvOmicronSledConfig::as_select()) .load_async(&*conn) .await .map_err(|e| { public_error_from_diesel(e, ErrorHandler::Server) })?; - paginator = p.found_batch(&batch, &|row| row.sled_id); - zones.extend(batch.into_iter().map(|sled_zones_config| { + paginator = p.found_batch(&batch, &|row| row.id); + configs.extend(batch.into_iter().map(|sled_config| { ( - sled_zones_config.sled_id.into(), - OmicronZonesConfig { - generation: sled_zones_config.generation.into(), - zones: Vec::new(), + sled_config.id, + OmicronSledConfig { + generation: sled_config.generation.into(), + remove_mupdate_override: sled_config + .remove_mupdate_override + .map(From::from), + disks: IdMap::default(), + datasets: IdMap::default(), + zones: IdMap::default(), }, ) })) } - zones + configs }; + // Build the set of this collection's sled IDs (for filtering the batch + // queries below). + let omicron_sled_config_ids = + omicron_sled_configs.keys().copied().collect::>(); + // Assemble a mutable map of all the NICs found, by NIC id. As we // match these up with the corresponding zone below, we'll remove items // from this set. That way we can tell if the same NIC was used twice // or not used at all. - let mut omicron_zone_nics: BTreeMap<_, _> = - { - use nexus_db_schema::schema::inv_omicron_zone_nic::dsl; + let mut omicron_zone_nics: BTreeMap<_, _> = { + use nexus_db_schema::schema::inv_omicron_sled_config_zone_nic::dsl; - let mut nics = BTreeMap::new(); + let mut nics = BTreeMap::new(); - let mut paginator = Paginator::new(batch_size); - while let Some(p) = paginator.next() { - let batch = paginated( - dsl::inv_omicron_zone_nic, - dsl::id, - &p.current_pagparams(), - ) - .filter(dsl::inv_collection_id.eq(db_id)) - .select(InvOmicronZoneNic::as_select()) - .load_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - paginator = p.found_batch(&batch, &|row| row.id); - nics.extend(batch.into_iter().map(|found_zone_nic| { + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::inv_omicron_sled_config_zone_nic, + dsl::id, + &p.current_pagparams(), + ) + .filter( + dsl::sled_config_id.eq_any(omicron_sled_config_ids.clone()), + ) + .select(InvOmicronSledConfigZoneNic::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.id); + nics.extend( + batch.into_iter().map(|found_zone_nic| { (found_zone_nic.id, found_zone_nic) - })); - } + }), + ); + } - nics - }; + nics + }; - // Now load the actual list of zones from all sleds. + // Now load the actual list of zones from all configs. let omicron_zones_list = { - use nexus_db_schema::schema::inv_omicron_zone::dsl; + use nexus_db_schema::schema::inv_omicron_sled_config_zone::dsl; let mut zones = Vec::new(); let mut paginator = Paginator::new(batch_size); while let Some(p) = paginator.next() { let mut batch = paginated( - dsl::inv_omicron_zone, + dsl::inv_omicron_sled_config_zone, dsl::id, &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(db_id)) - // It's not strictly necessary to order these by id. Doing so - // ensures a consistent representation for `Collection`, which - // makes testing easier. It's already indexed to do this, too. - .order_by(dsl::id) - .select(InvOmicronZone::as_select()) + .filter( + dsl::sled_config_id.eq_any(omicron_sled_config_ids.clone()), + ) + .select(InvOmicronSledConfigZone::as_select()) .load_async(&*conn) .await .map_err(|e| { @@ -2446,10 +2438,11 @@ impl DataStore { let nic_row = z .nic_id .map(|id| { - // This error means that we found a row in inv_omicron_zone - // that references a NIC by id but there's no corresponding - // row in inv_omicron_zone_nic with that id. This should be - // impossible and reflects either a bug or database + // This error means that we found a row in + // inv_omicron_sled_config_zone that references a NIC by id + // but there's no corresponding row in + // inv_omicron_sled_config_zone with that id. This should + // be impossible and reflects either a bug or database // corruption. omicron_zone_nics.remove(&id).ok_or_else(|| { Error::internal_error(&format!( @@ -2459,15 +2452,16 @@ impl DataStore { }) }) .transpose()?; - let map = - omicron_zones.get_mut(&z.sled_id.into()).ok_or_else(|| { - // This error means that we found a row in inv_omicron_zone - // with no associated record in inv_sled_omicron_zones. - // This should be impossible and reflects either a bug or - // database corruption. + let config = omicron_sled_configs + .get_mut(&z.sled_config_id) + .ok_or_else(|| { + // This error means that we found a row in + // inv_omicron_sled_config_zone with no associated record in + // inv_sled_omicron_config. This should be impossible and + // reflects either a bug or database corruption. Error::internal_error(&format!( - "zone {:?}: unknown sled: {:?}", - z.id, z.sled_id + "zone {:?}: unknown config ID: {:?}", + z.id, z.sled_config_id )) })?; let zone_id = z.id; @@ -2479,7 +2473,7 @@ impl DataStore { .map_err(|e| { Error::internal_error(&format!("{:#}", e.to_string())) })?; - map.zones.push(zone); + config.zones.insert(zone); } bail_unless!( @@ -2487,7 +2481,188 @@ impl DataStore { "found extra Omicron zone NICs: {:?}", omicron_zone_nics.keys() ); - */ + + // Now load the datasets from all configs. + { + use nexus_db_schema::schema::inv_omicron_sled_config_dataset::dsl; + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::inv_omicron_sled_config_dataset, + dsl::id, + &p.current_pagparams(), + ) + .filter( + dsl::sled_config_id.eq_any(omicron_sled_config_ids.clone()), + ) + .select(InvOmicronSledConfigDataset::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.id); + + for row in batch { + let config = omicron_sled_configs + .get_mut(&row.sled_config_id) + .ok_or_else(|| { + Error::internal_error(&format!( + "dataset config {:?}: unknown config ID: {:?}", + row.id, row.sled_config_id + )) + })?; + config.datasets.insert(row.try_into().map_err(|e| { + Error::internal_error(&format!("{e:#}")) + })?); + } + } + } + + // Now load the disks from all configs. + { + use nexus_db_schema::schema::inv_omicron_sled_config_disk::dsl; + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::inv_omicron_sled_config_disk, + dsl::id, + &p.current_pagparams(), + ) + .filter( + dsl::sled_config_id.eq_any(omicron_sled_config_ids.clone()), + ) + .select(InvOmicronSledConfigDisk::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.id); + + for row in batch { + let config = omicron_sled_configs + .get_mut(&row.sled_config_id) + .ok_or_else(|| { + Error::internal_error(&format!( + "disk config {:?}: unknown config ID: {:?}", + row.id, row.sled_config_id + )) + })?; + config.disks.insert(row.into()); + } + } + } + + // Load all the config reconciler disk results; build a map of maps + // keyed by sled ID. + let mut last_reconciliation_disk_results = { + use nexus_db_schema::schema::inv_last_reconciliation_disk_result::dsl; + + let mut results: BTreeMap< + SledUuid, + BTreeMap, + > = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::inv_last_reconciliation_disk_result, + dsl::disk_id, + &p.current_pagparams(), + ) + .filter(dsl::inv_collection_id.eq(db_id)) + .select(InvLastReconciliationDiskResult::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.disk_id); + + for row in batch { + let sled_map = + results.entry(row.sled_id.into()).or_default(); + sled_map.insert(row.disk_id.into(), row.into()); + } + } + + results + }; + + // Load all the config reconciler dataset results; build a map of maps + // keyed by sled ID. + let mut last_reconciliation_dataset_results = { + use nexus_db_schema::schema::inv_last_reconciliation_dataset_result::dsl; + + let mut results: BTreeMap< + SledUuid, + BTreeMap, + > = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::inv_last_reconciliation_dataset_result, + dsl::dataset_id, + &p.current_pagparams(), + ) + .filter(dsl::inv_collection_id.eq(db_id)) + .select(InvLastReconciliationDatasetResult::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.dataset_id); + + for row in batch { + let sled_map = + results.entry(row.sled_id.into()).or_default(); + sled_map.insert(row.dataset_id.into(), row.into()); + } + } + + results + }; + + // Load all the config reconciler zone results; build a map of maps + // keyed by sled ID. + let mut last_reconciliation_zone_results = { + use nexus_db_schema::schema::inv_last_reconciliation_zone_result::dsl; + + let mut results: BTreeMap< + SledUuid, + BTreeMap, + > = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::inv_last_reconciliation_zone_result, + dsl::zone_id, + &p.current_pagparams(), + ) + .filter(dsl::inv_collection_id.eq(db_id)) + .select(InvLastReconciliationZoneResult::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.zone_id); + + for row in batch { + let sled_map = + results.entry(row.sled_id.into()).or_default(); + sled_map.insert(row.zone_id.into(), row.into()); + } + } + + results + }; // Now load the clickhouse keeper cluster memberships let clickhouse_keeper_cluster_membership = { @@ -2537,36 +2712,52 @@ impl DataStore { }) .transpose()?; - /* - // Look up the Omicron zones. - // - // Older versions of Nexus fetched the Omicron zones in a separate - // request from the other sled agent data. The database model stil - // accounts for the possibility that for a given (collection, sled) - // pair, one of those queries succeeded while the other failed. But - // this has since been changed to fetch all the data in a single - // query, which means that newer collections will either have both - // sets of data or neither of them. - // - // If it _is_ the case that one of the pieces of data is missing, - // log that as a warning and drop the sled from the collection. - // This should only happen for old collections, and only in the - // unlikely case that exactly one of the two related requests - // failed. - // - // TODO: Update the database model to reflect the new reality - // (oxidecomputer/omicron#6770). - let Some(omicron_zones) = omicron_zones.remove(&sled_id) else { - warn!( - self.log, - "no sled Omicron zone data present -- assuming that collection was done - by an old Nexus version and dropping sled from it"; - "collection" => %id, - "sled_id" => %sled_id, - ); - continue; - }; - */ + // Convert all the sled config foreign keys back into fully realized + // `OmicronSledConfig`s. We have to clone these: the same + // `OmicronSledConfig` may be referenced up to three times by this + // sled. + let ledgered_sled_config = s + .ledgered_sled_config + .map(|id| { + omicron_sled_configs.get(&id).cloned().ok_or_else(|| { + Error::internal_error( + "missing sled config that we should have fetched", + ) + }) + }) + .transpose()?; + let reconciler_status = s + .reconciler_status + .to_status(|config_id| { + omicron_sled_configs.get(config_id).cloned() + }) + .map_err(|e| Error::internal_error(&format!("{e:#}")))?; + let last_reconciliation = s + .last_reconciliation_sled_config + .map(|id| { + let last_reconciled_config = omicron_sled_configs + .get(&id) + .cloned() + .ok_or_else(|| { + Error::internal_error( + "missing sled config that we \ + should have fetched", + ) + })?; + Ok::<_, Error>(ConfigReconcilerInventory { + last_reconciled_config, + external_disks: last_reconciliation_disk_results + .remove(&sled_id) + .unwrap_or_default(), + datasets: last_reconciliation_dataset_results + .remove(&sled_id) + .unwrap_or_default(), + zones: last_reconciliation_zone_results + .remove(&sled_id) + .unwrap_or_default(), + }) + }) + .transpose()?; let sled_agent = nexus_types::inventory::SledAgent { time_collected: s.time_collected, @@ -2599,14 +2790,31 @@ impl DataStore { .get(sled_id.as_untyped_uuid()) .map(|datasets| datasets.to_vec()) .unwrap_or_default(), - // TODO-john This is all wrong! - ledgered_sled_config: None, - reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, - last_reconciliation: None, + ledgered_sled_config, + reconciler_status, + last_reconciliation, }; sled_agents.insert(sled_id, sled_agent); } + // Check that we consumed all the reconciliation results we found in + // this collection. + bail_unless!( + last_reconciliation_disk_results.is_empty(), + "found extra config reconciliation disk results: {:?}", + last_reconciliation_disk_results.keys() + ); + bail_unless!( + last_reconciliation_dataset_results.is_empty(), + "found extra config reconciliation dataset results: {:?}", + last_reconciliation_dataset_results.keys() + ); + bail_unless!( + last_reconciliation_zone_results.is_empty(), + "found extra config reconciliation zone results: {:?}", + last_reconciliation_zone_results.keys() + ); + Ok(Collection { id, errors, @@ -2670,7 +2878,7 @@ impl ConfigReconcilerRows { let mut ledgered_sled_config = None; if let Some(config) = &sled_agent.ledgered_sled_config { ledgered_sled_config = - Some(self.accumulate_config(collection_id, sled_id, config)?); + Some(self.accumulate_config(collection_id, config)?); } let mut last_reconciliation_sled_config = None; @@ -2685,7 +2893,6 @@ impl ConfigReconcilerRows { last_reconciliation_sled_config = Some(self.accumulate_config( collection_id, - sled_id, &last_reconciliation.last_reconciled_config, )?); } @@ -2756,11 +2963,7 @@ impl ConfigReconcilerRows { { last_reconciliation_sled_config } else { - Some(self.accumulate_config( - collection_id, - sled_id, - config, - )?) + Some(self.accumulate_config(collection_id, config)?) }; InvConfigReconcilerStatus { reconciler_status_kind: @@ -2799,29 +3002,24 @@ impl ConfigReconcilerRows { fn accumulate_config( &mut self, collection_id: CollectionUuid, - sled_id: SledUuid, config: &OmicronSledConfig, ) -> anyhow::Result { let sled_config_id = Uuid::new_v4(); self.sled_configs.push(InvOmicronSledConfig::new( collection_id, - sled_id, sled_config_id, config, )); self.disks.extend(config.disks.iter().map(|disk| { - InvOmicronSledConfigDisk::new(sled_config_id, sled_id, disk.clone()) + InvOmicronSledConfigDisk::new(sled_config_id, disk.clone()) })); self.datasets.extend(config.datasets.iter().map(|dataset| { - InvOmicronSledConfigDataset::new(sled_config_id, sled_id, dataset) + InvOmicronSledConfigDataset::new(sled_config_id, dataset) })); for zone in &config.zones { - self.zones.push(InvOmicronSledConfigZone::new( - sled_config_id, - sled_id, - zone, - )?); + self.zones + .push(InvOmicronSledConfigZone::new(sled_config_id, zone)?); if let Some(nic) = InvOmicronSledConfigZoneNic::new(sled_config_id, zone)? { @@ -2893,15 +3091,23 @@ mod test { use nexus_db_schema::schema; use nexus_inventory::examples::Representative; use nexus_inventory::examples::representative; + use nexus_inventory::now_db_precision; + use nexus_sled_agent_shared::inventory::{ + ConfigReconcilerInventory, ConfigReconcilerInventoryResult, + ConfigReconcilerInventoryStatus, + }; use nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::RotPageWhich; use omicron_common::api::external::Error; use omicron_test_utils::dev; - use omicron_uuid_kinds::CollectionUuid; + use omicron_uuid_kinds::{ + CollectionUuid, DatasetUuid, OmicronZoneUuid, PhysicalDiskUuid, + }; use pretty_assertions::assert_eq; use std::num::NonZeroU32; + use std::time::Duration; struct CollectionCounts { baseboards: usize, @@ -3425,19 +3631,55 @@ mod test { .unwrap(); assert_eq!(0, count); let count = - schema::inv_sled_omicron_zones::dsl::inv_sled_omicron_zones + schema::inv_omicron_sled_config::dsl::inv_omicron_sled_config .select(diesel::dsl::count_star()) .first_async::(&conn) .await .unwrap(); assert_eq!(0, count); - let count = schema::inv_omicron_zone::dsl::inv_omicron_zone + let count = schema::inv_last_reconciliation_disk_result::dsl::inv_last_reconciliation_disk_result + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .unwrap(); + assert_eq!(0, count); + let count = schema::inv_last_reconciliation_dataset_result::dsl::inv_last_reconciliation_dataset_result + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .unwrap(); + assert_eq!(0, count); + let count = schema::inv_last_reconciliation_zone_result::dsl::inv_last_reconciliation_zone_result .select(diesel::dsl::count_star()) .first_async::(&conn) .await .unwrap(); assert_eq!(0, count); - let count = schema::inv_omicron_zone_nic::dsl::inv_omicron_zone_nic + let count = schema::inv_omicron_sled_config::dsl::inv_omicron_sled_config + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .unwrap(); + assert_eq!(0, count); + let count = schema::inv_omicron_sled_config_disk::dsl::inv_omicron_sled_config_disk + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .unwrap(); + assert_eq!(0, count); + let count = schema::inv_omicron_sled_config_dataset::dsl::inv_omicron_sled_config_dataset + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .unwrap(); + assert_eq!(0, count); + let count = schema::inv_omicron_sled_config_zone::dsl::inv_omicron_sled_config_zone + .select(diesel::dsl::count_star()) + .first_async::(&conn) + .await + .unwrap(); + assert_eq!(0, count); + let count = schema::inv_omicron_sled_config_zone_nic::dsl::inv_omicron_sled_config_zone_nic .select(diesel::dsl::count_star()) .first_async::(&conn) .await @@ -3611,4 +3853,96 @@ mod test { db.terminate().await; logctx.cleanup_successful(); } + + #[tokio::test] + async fn test_reconciler_status_fields() { + // Setup + let logctx = dev::test_setup_log("inventory_deletion"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Start with a representative collection. + let Representative { builder, .. } = representative(); + let mut collection = builder.build(); + + // Mutate the sled-agent contents to test variants of the reconciler + // fields: + // + // * try all three `ConfigReconcilerInventoryStatus` variants + // * add a `last_reconciliation` with a mix of success/error results + let mut sled_agents = collection.sled_agents.values_mut(); + let sa1 = sled_agents.next().expect("at least 1 sled agent"); + let sa2 = sled_agents.next().expect("at least 2 sled agents"); + let sa3 = sled_agents.next().expect("at least 3 sled agents"); + + sa1.reconciler_status = ConfigReconcilerInventoryStatus::NotYetRun; + sa1.last_reconciliation = None; + + sa2.reconciler_status = ConfigReconcilerInventoryStatus::Running { + config: sa2.ledgered_sled_config.clone().unwrap(), + started_at: now_db_precision(), + running_for: Duration::from_secs(1), + }; + sa2.last_reconciliation = Some({ + let make_result = |kind, i| { + if i % 2 == 0 { + ConfigReconcilerInventoryResult::Ok + } else { + ConfigReconcilerInventoryResult::Err { + message: format!("fake {kind} error {i}"), + } + } + }; + ConfigReconcilerInventory { + last_reconciled_config: sa2 + .ledgered_sled_config + .clone() + .unwrap(), + external_disks: (0..10) + .map(|i| { + (PhysicalDiskUuid::new_v4(), make_result("disk", i)) + }) + .collect(), + datasets: (0..10) + .map(|i| (DatasetUuid::new_v4(), make_result("dataset", i))) + .collect(), + zones: (0..10) + .map(|i| { + (OmicronZoneUuid::new_v4(), make_result("zone", i)) + }) + .collect(), + } + }); + + sa3.reconciler_status = ConfigReconcilerInventoryStatus::Idle { + completed_at: now_db_precision(), + ran_for: Duration::from_secs(5), + }; + + // Write it to the db; read it back and check it survived. + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect("failed to insert collection"); + let collection_read = datastore + .inventory_collection_read(&opctx, collection.id) + .await + .expect("failed to read collection back"); + assert_eq!(collection, collection_read); + + // Now delete it and ensure we remove everything. + datastore + .inventory_delete_collection(&opctx, collection.id) + .await + .expect("failed to prune collections"); + + // Read all "inv_" tables and ensure that they're empty + check_all_inv_tables(&datastore, AllInvTables::AreEmpty).await.expect( + "All inv_... tables should be deleted alongside collection", + ); + + // Clean up. + db.terminate().await; + logctx.cleanup_successful(); + } } diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index fce13f8a1fd..473b695b2d5 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1678,9 +1678,8 @@ table! { } table! { - inv_omicron_sled_config (inv_collection_id, sled_id, id) { + inv_omicron_sled_config (inv_collection_id, id) { inv_collection_id -> Uuid, - sled_id -> Uuid, id -> Uuid, generation -> Int8, @@ -1690,7 +1689,6 @@ table! { table! { inv_omicron_sled_config_zone (sled_config_id, id) { - sled_id -> Uuid, sled_config_id -> Uuid, id -> Uuid, diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index bac832cc148..69a72445e87 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -5,6 +5,7 @@ //! Example collections used for testing use crate::CollectionBuilder; +use crate::now_db_precision; use clickhouse_admin_types::ClickhouseKeeperClusterMembership; use clickhouse_admin_types::KeeperId; use gateway_client::types::PowerState; @@ -14,6 +15,8 @@ use gateway_client::types::SpComponentCaboose; use gateway_client::types::SpState; use gateway_client::types::SpType; use nexus_sled_agent_shared::inventory::Baseboard; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDataset; @@ -26,11 +29,21 @@ use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::RotPage; use nexus_types::inventory::RotPageWhich; +use nexus_types::inventory::ZpoolName; use omicron_common::api::external::ByteCount; +use omicron_common::disk::DatasetConfig; +use omicron_common::disk::DatasetKind; +use omicron_common::disk::DatasetName; use omicron_common::disk::DiskVariant; +use omicron_common::disk::OmicronPhysicalDiskConfig; +use omicron_common::disk::SharedDatasetConfig; +use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolUuid; use std::sync::Arc; +use std::time::Duration; use strum::IntoEnumIterator; +use uuid::Uuid; /// Returns an example Collection used for testing /// @@ -296,10 +309,9 @@ pub fn representative() -> Representative { let sled16: OmicronZonesConfig = serde_json::from_str(sled16_data).unwrap(); let sled17: OmicronZonesConfig = serde_json::from_str(sled17_data).unwrap(); - // Convert these to `OmicronSledConfig`s. For now we leave the disks and - // datasets blank. This is wrong but not (currently) a problem. If you - // landed here and it is a problem for you now, I apologize. - let sled14 = OmicronSledConfig { + // Convert these to `OmicronSledConfig`s. We'll start with empty disks and + // datasets for now, and add to them below for sled14. + let mut sled14 = OmicronSledConfig { generation: sled14.generation, disks: Default::default(), datasets: Default::default(), @@ -321,6 +333,30 @@ pub fn representative() -> Representative { remove_mupdate_override: None, }; + // Create iterator producing fixed IDs. + let mut disk_id_iter = std::iter::from_fn({ + // "physicaldisk" + let mut value = "70687973-6963-616c-6469-736b00000000" + .parse::() + .unwrap() + .as_u128(); + move || { + value += 1; + Some(PhysicalDiskUuid::from_u128(value)) + } + }); + let mut zpool_id_iter = std::iter::from_fn({ + // "zpool" + let mut value = "7a706f6f-6c00-0000-0000-000000000000" + .parse::() + .unwrap() + .as_u128(); + move || { + value += 1; + Some(ZpoolUuid::from_u128(value)) + } + }); + // Report some sled agents. // // This first one will match "sled1_bb"'s baseboard information. @@ -388,19 +424,41 @@ pub fn representative() -> Representative { slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, ]; - let zpools = vec![InventoryZpool { - id: "283f5475-2606-4e83-b001-9a025dbcb8a0".parse().unwrap(), - total_size: ByteCount::from(4096), - }]; + let mut zpools = Vec::new(); + for disk in &disks { + let pool_id = zpool_id_iter.next().unwrap(); + sled14.disks.insert(OmicronPhysicalDiskConfig { + identity: disk.identity.clone(), + id: disk_id_iter.next().unwrap(), + pool_id, + }); + zpools.push(InventoryZpool { + id: pool_id, + total_size: ByteCount::from(4096), + }); + } + let dataset_name = DatasetName::new( + ZpoolName::new_external(zpools[0].id), + DatasetKind::Debug, + ); let datasets = vec![InventoryDataset { id: Some("afc00483-0d7b-4181-87d5-0def937d3cd7".parse().unwrap()), - name: "mydataset".to_string(), + name: dataset_name.full_name(), available: ByteCount::from(1024), used: ByteCount::from(0), quota: None, reservation: None, compression: "lz4".to_string(), }]; + sled14.datasets.insert(DatasetConfig { + id: datasets[0].id.unwrap(), + name: dataset_name, + inner: SharedDatasetConfig { + compression: datasets[0].compression.parse().unwrap(), + quota: datasets[0].quota, + reservation: datasets[0].reservation, + }, + }); builder .found_sled_inventory( @@ -584,6 +642,40 @@ pub fn sled_agent( datasets: Vec, ledgered_sled_config: Option, ) -> Inventory { + // Assume the `ledgered_sled_config` was reconciled successfully. + let last_reconciliation = ledgered_sled_config.clone().map(|config| { + let external_disks = dbg!(&config) + .disks + .iter() + .map(|d| (d.id, ConfigReconcilerInventoryResult::Ok)) + .collect(); + let datasets = config + .datasets + .iter() + .map(|d| (d.id, ConfigReconcilerInventoryResult::Ok)) + .collect(); + let zones = config + .zones + .iter() + .map(|z| (z.id, ConfigReconcilerInventoryResult::Ok)) + .collect(); + ConfigReconcilerInventory { + last_reconciled_config: config, + external_disks, + datasets, + zones, + } + }); + + let reconciler_status = if last_reconciliation.is_some() { + ConfigReconcilerInventoryStatus::Idle { + completed_at: now_db_precision(), + ran_for: Duration::from_secs(5), + } + } else { + ConfigReconcilerInventoryStatus::NotYetRun + }; + Inventory { baseboard, reservoir_size: ByteCount::from(1024), @@ -596,7 +688,7 @@ pub fn sled_agent( zpools, datasets, ledgered_sled_config, - reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, - last_reconciliation: None, + reconciler_status, + last_reconciliation, } } diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 73eb1857794..52ad6ef9276 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3619,29 +3619,29 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( -- Columns making up the status of the config reconciler. reconciler_status_kind inv_config_reconciler_status_kind NOT NULL, -- (foreign key into `inv_omicron_sled_config` table) - -- only present if `inv_config_reconciler_status_kind = 'running'` + -- only present if `reconciler_status_kind = 'running'` reconciler_status_sled_config UUID CHECK ( - (inv_config_reconciler_status_kind = 'running' + (reconciler_status_kind = 'running' AND reconciler_status_sled_config IS NOT NULL) OR - (inv_config_reconciler_status_kind != 'running' + (reconciler_status_kind != 'running' AND reconciler_status_sled_config IS NULL) ), - -- only present if `inv_config_reconciler_status_kind != 'not-yet-run'` + -- only present if `reconciler_status_kind != 'not-yet-run'` reconciler_status_timestamp TIMESTAMPTZ CHECK ( - (inv_config_reconciler_status_kind = 'not-yet-run' - AND reconciler_status_sled_config IS NULL) + (reconciler_status_kind = 'not-yet-run' + AND reconciler_status_timestamp IS NULL) OR - (inv_config_reconciler_status_kind != 'not-yet-run' - AND reconciler_status_sled_config IS NOT NULL) + (reconciler_status_kind != 'not-yet-run' + AND reconciler_status_timestamp IS NOT NULL) ), - -- only present if `inv_config_reconciler_status_kind != 'not-yet-run'` + -- only present if `reconciler_status_kind != 'not-yet-run'` reconciler_status_duration_secs FLOAT CHECK ( - (inv_config_reconciler_status_kind = 'not-yet-run' - AND reconciler_status_sled_config IS NULL) + (reconciler_status_kind = 'not-yet-run' + AND reconciler_status_duration_secs IS NULL) OR - (inv_config_reconciler_status_kind != 'not-yet-run' - AND reconciler_status_sled_config IS NOT NULL) + (reconciler_status_kind != 'not-yet-run' + AND reconciler_status_duration_secs IS NOT NULL) ), PRIMARY KEY (inv_collection_id, sled_id) @@ -3752,10 +3752,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config ( -- (foreign key into `inv_collection` table) inv_collection_id UUID NOT NULL, - -- unique id for this sled (should be foreign keys into `sled` table, though - -- it's conceivable a sled will report an id that we don't know about) - sled_id UUID NOT NULL, - -- ID of this sled config. A given inventory report from a sled agent may -- contain 0-3 sled configs, so we generate these IDs on insertion and -- record them as the foreign keys in `inv_sled_agent`. @@ -3767,7 +3763,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config ( -- remove mupdate override ID, if set remove_mupdate_override UUID, - PRIMARY KEY (inv_collection_id, sled_id, id) + PRIMARY KEY (inv_collection_id, id) ); CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_disk_result ( @@ -3845,10 +3841,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone ( -- (foreign key into `inv_omicron_sled_config` table) sled_config_id UUID NOT NULL, - -- unique id for this sled (should be foreign keys into `sled` table, though - -- it's conceivable a sled will report an id that we don't know about) - sled_id UUID NOT NULL, - -- unique id for this zone id UUID NOT NULL, zone_type omicron.public.zone_type NOT NULL, @@ -3914,8 +3906,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone ( CREATE INDEX IF NOT EXISTS inv_omicron_sled_config_zone_nic_id ON omicron.public.inv_omicron_sled_config_zone (nic_id) STORING ( - sled_id, - sled_config_id, primary_service_ip, second_service_ip, snat_ip @@ -3938,7 +3928,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone_nic ( CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_dataset ( -- foreign key into the `inv_omicron_sled_config` table sled_config_id UUID NOT NULL, - sled_id UUID NOT NULL, id UUID NOT NULL, pool_id UUID NOT NULL, @@ -3961,7 +3950,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_dataset ( CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_disk ( -- foreign key into the `inv_omicron_sled_config` table sled_config_id UUID NOT NULL, - sled_id UUID NOT NULL, id UUID NOT NULL, vendor TEXT NOT NULL, From b74d38fca90d3f9523eaba4c7776b36eceb5218b Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 19 May 2025 14:24:46 -0400 Subject: [PATCH 05/22] wip: delete collections --- nexus/db-model/src/inventory.rs | 17 +- .../db-queries/src/db/datastore/inventory.rs | 152 ++++++++++++------ nexus/db-schema/src/schema.rs | 12 +- schema/crdb/dbinit.sql | 19 ++- 4 files changed, 142 insertions(+), 58 deletions(-) diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index cb886e3a183..7485977fae8 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -1510,6 +1510,7 @@ impl From for ZoneType { #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_omicron_sled_config_disk)] pub struct InvOmicronSledConfigDisk { + pub inv_collection_id: DbTypedUuid, pub sled_config_id: Uuid, pub id: DbTypedUuid, @@ -1522,10 +1523,12 @@ pub struct InvOmicronSledConfigDisk { impl InvOmicronSledConfigDisk { pub fn new( + inv_collection_id: CollectionUuid, sled_config_id: Uuid, disk_config: OmicronPhysicalDiskConfig, ) -> Self { Self { + inv_collection_id: inv_collection_id.into(), sled_config_id, id: disk_config.id.into(), vendor: disk_config.identity.vendor, @@ -1554,6 +1557,7 @@ impl From for OmicronPhysicalDiskConfig { #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_omicron_sled_config_dataset)] pub struct InvOmicronSledConfigDataset { + pub inv_collection_id: DbTypedUuid, pub sled_config_id: Uuid, pub id: DbTypedUuid, @@ -1567,8 +1571,13 @@ pub struct InvOmicronSledConfigDataset { } impl InvOmicronSledConfigDataset { - pub fn new(sled_config_id: Uuid, dataset_config: &DatasetConfig) -> Self { + pub fn new( + inv_collection_id: CollectionUuid, + sled_config_id: Uuid, + dataset_config: &DatasetConfig, + ) -> Self { Self { + inv_collection_id: inv_collection_id.into(), sled_config_id, id: dataset_config.id.into(), pool_id: dataset_config.name.pool().id().into(), @@ -1609,6 +1618,7 @@ impl TryFrom for DatasetConfig { #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_omicron_sled_config_zone)] pub struct InvOmicronSledConfigZone { + pub inv_collection_id: DbTypedUuid, pub sled_config_id: Uuid, pub id: DbTypedUuid, pub zone_type: ZoneType, @@ -1633,6 +1643,7 @@ pub struct InvOmicronSledConfigZone { impl InvOmicronSledConfigZone { pub fn new( + inv_collection_id: CollectionUuid, sled_config_id: Uuid, zone: &OmicronZoneConfig, ) -> Result { @@ -1641,6 +1652,7 @@ impl InvOmicronSledConfigZone { let mut inv_omicron_zone = InvOmicronSledConfigZone { // Fill in the known fields that don't require inspecting // `zone.zone_type` + inv_collection_id: inv_collection_id.into(), sled_config_id, id: zone.id.into(), filesystem_pool: zone @@ -1970,6 +1982,7 @@ impl InvOmicronSledConfigZone { #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_omicron_sled_config_zone_nic)] pub struct InvOmicronSledConfigZoneNic { + inv_collection_id: DbTypedUuid, sled_config_id: Uuid, pub id: Uuid, name: Name, @@ -1998,6 +2011,7 @@ impl From for OmicronZoneNic { impl InvOmicronSledConfigZoneNic { pub fn new( + inv_collection_id: CollectionUuid, sled_config_id: Uuid, zone: &OmicronZoneConfig, ) -> Result, anyhow::Error> { @@ -2006,6 +2020,7 @@ impl InvOmicronSledConfigZoneNic { }; let nic = OmicronZoneNic::new(zone.id, nic)?; Ok(Some(Self { + inv_collection_id: inv_collection_id.into(), sled_config_id, id: nic.id, name: nic.name, diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 862d92d40c4..e01a6a34884 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -1453,9 +1453,14 @@ impl DataStore { ndatasets, nphysical_disks, nnvme_disk_disk_firmware, - nsled_agent_zones, - nzones, - nnics, + nlast_reconciliation_disk_results, + nlast_reconciliation_dataset_results, + nlast_reconciliation_zone_results, + nomicron_sled_configs, + nomicron_sled_config_disks, + nomicron_sled_config_datasets, + nomicron_sled_config_zones, + nomicron_sled_config_zone_nics, nzpools, nerrors, nclickhouse_keeper_membership, @@ -1553,38 +1558,74 @@ impl DataStore { .await? }; - let nsled_agent_zones = 0; - let nzones = 0; - let nnics = 0; - /* - // Remove rows associated with Omicron zones - let nsled_agent_zones = { - use nexus_db_schema::schema::inv_sled_omicron_zones::dsl; - diesel::delete(dsl::inv_sled_omicron_zones.filter( + // Remove rows associated with the last reconciliation + // result (disks, datasets, and zones). + let nlast_reconciliation_disk_results = { + use nexus_db_schema::schema::inv_last_reconciliation_disk_result::dsl; + diesel::delete(dsl::inv_last_reconciliation_disk_result.filter( dsl::inv_collection_id.eq(db_collection_id), )) .execute_async(&conn) .await? }; - - let nzones = { - use nexus_db_schema::schema::inv_omicron_zone::dsl; - diesel::delete(dsl::inv_omicron_zone.filter( + let nlast_reconciliation_dataset_results = { + use nexus_db_schema::schema::inv_last_reconciliation_dataset_result::dsl; + diesel::delete(dsl::inv_last_reconciliation_dataset_result.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; + let nlast_reconciliation_zone_results = { + use nexus_db_schema::schema::inv_last_reconciliation_zone_result::dsl; + diesel::delete(dsl::inv_last_reconciliation_zone_result.filter( dsl::inv_collection_id.eq(db_collection_id), )) .execute_async(&conn) .await? }; - let nnics = { - use nexus_db_schema::schema::inv_omicron_zone_nic::dsl; - diesel::delete(dsl::inv_omicron_zone_nic.filter( + // Remove rows associated with `OmicronSledConfig`s. + let nomicron_sled_configs = { + use nexus_db_schema::schema::inv_omicron_sled_config::dsl; + diesel::delete(dsl::inv_omicron_sled_config.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; + let nomicron_sled_config_disks = { + use nexus_db_schema::schema::inv_omicron_sled_config_disk::dsl; + diesel::delete(dsl::inv_omicron_sled_config_disk.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; + let nomicron_sled_config_datasets = { + use nexus_db_schema::schema::inv_omicron_sled_config_dataset::dsl; + diesel::delete(dsl::inv_omicron_sled_config_dataset.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; + let nomicron_sled_config_zones = { + use nexus_db_schema::schema::inv_omicron_sled_config_zone::dsl; + diesel::delete(dsl::inv_omicron_sled_config_zone.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; + let nomicron_sled_config_zone_nics = { + use nexus_db_schema::schema::inv_omicron_sled_config_zone_nic::dsl; + diesel::delete(dsl::inv_omicron_sled_config_zone_nic.filter( dsl::inv_collection_id.eq(db_collection_id), )) .execute_async(&conn) .await? }; - */ let nzpools = { use nexus_db_schema::schema::inv_zpool::dsl; @@ -1627,9 +1668,14 @@ impl DataStore { ndatasets, nphysical_disks, nnvme_disk_firwmare, - nsled_agent_zones, - nzones, - nnics, + nlast_reconciliation_disk_results, + nlast_reconciliation_dataset_results, + nlast_reconciliation_zone_results, + nomicron_sled_configs, + nomicron_sled_config_disks, + nomicron_sled_config_datasets, + nomicron_sled_config_zones, + nomicron_sled_config_zone_nics, nzpools, nerrors, nclickhouse_keeper_membership, @@ -1651,9 +1697,17 @@ impl DataStore { "ndatasets" => ndatasets, "nphysical_disks" => nphysical_disks, "nnvme_disk_firmware" => nnvme_disk_disk_firmware, - "nsled_agent_zones" => nsled_agent_zones, - "nzones" => nzones, - "nnics" => nnics, + "nlast_reconciliation_disk_results" => + nlast_reconciliation_disk_results, + "nlast_reconciliation_dataset_results" => + nlast_reconciliation_dataset_results, + "nlast_reconciliation_zone_results" => + nlast_reconciliation_zone_results, + "nomicron_sled_configs" => nomicron_sled_configs, + "nomicron_sled_config_disks" => nomicron_sled_config_disks, + "nomicron_sled_config_datasets" => nomicron_sled_config_datasets, + "nomicron_sled_config_zones" => nomicron_sled_config_zones, + "nomicron_sled_config_zone_nics" => nomicron_sled_config_zone_nics, "nzpools" => nzpools, "nerrors" => nerrors, "nclickhouse_keeper_membership" => nclickhouse_keeper_membership @@ -2365,11 +2419,6 @@ impl DataStore { configs }; - // Build the set of this collection's sled IDs (for filtering the batch - // queries below). - let omicron_sled_config_ids = - omicron_sled_configs.keys().copied().collect::>(); - // Assemble a mutable map of all the NICs found, by NIC id. As we // match these up with the corresponding zone below, we'll remove items // from this set. That way we can tell if the same NIC was used twice @@ -2386,9 +2435,7 @@ impl DataStore { dsl::id, &p.current_pagparams(), ) - .filter( - dsl::sled_config_id.eq_any(omicron_sled_config_ids.clone()), - ) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvOmicronSledConfigZoneNic::as_select()) .load_async(&*conn) .await @@ -2419,9 +2466,7 @@ impl DataStore { dsl::id, &p.current_pagparams(), ) - .filter( - dsl::sled_config_id.eq_any(omicron_sled_config_ids.clone()), - ) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvOmicronSledConfigZone::as_select()) .load_async(&*conn) .await @@ -2493,9 +2538,7 @@ impl DataStore { dsl::id, &p.current_pagparams(), ) - .filter( - dsl::sled_config_id.eq_any(omicron_sled_config_ids.clone()), - ) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvOmicronSledConfigDataset::as_select()) .load_async(&*conn) .await @@ -2531,9 +2574,7 @@ impl DataStore { dsl::id, &p.current_pagparams(), ) - .filter( - dsl::sled_config_id.eq_any(omicron_sled_config_ids.clone()), - ) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvOmicronSledConfigDisk::as_select()) .load_async(&*conn) .await @@ -3012,17 +3053,30 @@ impl ConfigReconcilerRows { config, )); self.disks.extend(config.disks.iter().map(|disk| { - InvOmicronSledConfigDisk::new(sled_config_id, disk.clone()) + InvOmicronSledConfigDisk::new( + collection_id, + sled_config_id, + disk.clone(), + ) })); self.datasets.extend(config.datasets.iter().map(|dataset| { - InvOmicronSledConfigDataset::new(sled_config_id, dataset) + InvOmicronSledConfigDataset::new( + collection_id, + sled_config_id, + dataset, + ) })); for zone in &config.zones { - self.zones - .push(InvOmicronSledConfigZone::new(sled_config_id, zone)?); - if let Some(nic) = - InvOmicronSledConfigZoneNic::new(sled_config_id, zone)? - { + self.zones.push(InvOmicronSledConfigZone::new( + collection_id, + sled_config_id, + zone, + )?); + if let Some(nic) = InvOmicronSledConfigZoneNic::new( + collection_id, + sled_config_id, + zone, + )? { self.zone_nics.push(nic); } } diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 473b695b2d5..f635a2f027f 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1688,7 +1688,8 @@ table! { } table! { - inv_omicron_sled_config_zone (sled_config_id, id) { + inv_omicron_sled_config_zone (inv_collection_id, sled_config_id, id) { + inv_collection_id -> Uuid, sled_config_id -> Uuid, id -> Uuid, @@ -1715,7 +1716,8 @@ table! { } table! { - inv_omicron_sled_config_zone_nic (sled_config_id, id) { + inv_omicron_sled_config_zone_nic (inv_collection_id, sled_config_id, id) { + inv_collection_id -> Uuid, sled_config_id -> Uuid, id -> Uuid, name -> Text, @@ -1729,7 +1731,8 @@ table! { } table! { - inv_omicron_sled_config_dataset (sled_config_id, id) { + inv_omicron_sled_config_dataset (inv_collection_id, sled_config_id, id) { + inv_collection_id -> Uuid, sled_config_id -> Uuid, sled_id -> Uuid, id -> Uuid, @@ -1745,7 +1748,8 @@ table! { } table! { - inv_omicron_sled_config_disk (sled_config_id, id) { + inv_omicron_sled_config_disk (inv_collection_id, sled_config_id, id) { + inv_collection_id -> Uuid, sled_config_id -> Uuid, sled_id -> Uuid, id -> Uuid, diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 52ad6ef9276..9910418465c 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3838,6 +3838,8 @@ CREATE TYPE IF NOT EXISTS omicron.public.zone_type AS ENUM ( -- `zones` portion of an `OmicronSledConfig` observed from sled-agent CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone ( -- where this observation came from + inv_collection_id UUID NOT NULL, + -- (foreign key into `inv_omicron_sled_config` table) sled_config_id UUID NOT NULL, @@ -3900,7 +3902,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone ( -- Eventually, that nullability should be removed. filesystem_pool UUID, - PRIMARY KEY (sled_config_id, id) + PRIMARY KEY (inv_collection_id, sled_config_id, id) ); CREATE INDEX IF NOT EXISTS inv_omicron_sled_config_zone_nic_id @@ -3912,6 +3914,9 @@ CREATE INDEX IF NOT EXISTS inv_omicron_sled_config_zone_nic_id ); CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone_nic ( + -- where this observation came from + inv_collection_id UUID NOT NULL, + sled_config_id UUID NOT NULL, id UUID NOT NULL, name TEXT NOT NULL, @@ -3922,10 +3927,13 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone_nic ( is_primary BOOLEAN NOT NULL, slot INT2 NOT NULL, - PRIMARY KEY (sled_config_id, id) + PRIMARY KEY (inv_collection_id, sled_config_id, id) ); CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_dataset ( + -- where this observation came from + inv_collection_id UUID NOT NULL, + -- foreign key into the `inv_omicron_sled_config` table sled_config_id UUID NOT NULL, id UUID NOT NULL, @@ -3944,10 +3952,13 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_dataset ( (kind = 'zone' AND zone_name IS NOT NULL) ), - PRIMARY KEY (sled_config_id, id) + PRIMARY KEY (inv_collection_id, sled_config_id, id) ); CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_disk ( + -- where this observation came from + inv_collection_id UUID NOT NULL, + -- foreign key into the `inv_omicron_sled_config` table sled_config_id UUID NOT NULL, id UUID NOT NULL, @@ -3958,7 +3969,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_disk ( pool_id UUID NOT NULL, - PRIMARY KEY (sled_config_id, id) + PRIMARY KEY (inv_collection_id, sled_config_id, id) ); CREATE TABLE IF NOT EXISTS omicron.public.inv_clickhouse_keeper_membership ( From 2e9555a20249734d646563cfdeb6276954d2b3b2 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 19 May 2025 15:33:24 -0400 Subject: [PATCH 06/22] persist OmicronZoneConfig::image_source --- id-map/src/lib.rs | 1 + nexus/db-model/src/inventory.rs | 47 +++++++++++- .../db-queries/src/db/datastore/inventory.rs | 73 +++++++++++++++++-- nexus/db-schema/src/enums.rs | 1 + nexus/db-schema/src/schema.rs | 3 + schema/crdb/dbinit.sql | 31 ++++++-- 6 files changed, 141 insertions(+), 15 deletions(-) diff --git a/id-map/src/lib.rs b/id-map/src/lib.rs index c8e6699717e..20b37aa776a 100644 --- a/id-map/src/lib.rs +++ b/id-map/src/lib.rs @@ -289,6 +289,7 @@ impl Diffable for IdMap { /// Wrapper around a `&'a mut T` that panics when dropped if the borrowed /// value's `id()` has changed since the wrapper was created. +#[derive(Debug)] pub struct RefMut<'a, T: IdMappable> { original_id: T::Id, // Always `Some(_)` until the `RefMut` is consumed by `into_ref()`. diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 7485977fae8..1a9de2994a6 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -4,6 +4,7 @@ //! Types for representing the hardware/software inventory in the database +use crate::ArtifactHash; use crate::Generation; use crate::PhysicalDiskKind; use crate::omicron_zone_config::{self, OmicronZoneNic}; @@ -1614,6 +1615,17 @@ impl TryFrom for DatasetConfig { } } +impl_enum_type!( + InvZoneImageSourceEnum: + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq)] + pub enum InvZoneImageSource; + + // Enum values + InstallDataset => b"install_dataset" + Artifact => b"artifact" +); + /// See [`nexus_sled_agent_shared::inventory::OmicronZoneConfig`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_omicron_sled_config_zone)] @@ -1639,6 +1651,8 @@ pub struct InvOmicronSledConfigZone { pub snat_first_port: Option, pub snat_last_port: Option, pub filesystem_pool: Option>, + pub image_source: InvZoneImageSource, + pub image_artifact_sha256: Option, } impl InvOmicronSledConfigZone { @@ -1647,6 +1661,15 @@ impl InvOmicronSledConfigZone { sled_config_id: Uuid, zone: &OmicronZoneConfig, ) -> Result { + let (image_source, image_artifact_sha256) = match &zone.image_source { + OmicronZoneImageSource::InstallDataset => { + (InvZoneImageSource::InstallDataset, None) + } + OmicronZoneImageSource::Artifact { hash } => { + (InvZoneImageSource::Artifact, Some(ArtifactHash(*hash))) + } + }; + // Create a dummy record to start, then fill in the rest // according to the zone type let mut inv_omicron_zone = InvOmicronSledConfigZone { @@ -1681,6 +1704,8 @@ impl InvOmicronSledConfigZone { snat_ip: None, snat_first_port: None, snat_last_port: None, + image_source, + image_artifact_sha256, }; match &zone.zone_type { @@ -1968,13 +1993,31 @@ impl InvOmicronSledConfigZone { } }; + let image_source = match (self.image_source, self.image_artifact_sha256) + { + (InvZoneImageSource::InstallDataset, None) => { + OmicronZoneImageSource::InstallDataset + } + (InvZoneImageSource::Artifact, Some(ArtifactHash(hash))) => { + OmicronZoneImageSource::Artifact { hash } + } + (InvZoneImageSource::InstallDataset, Some(_)) + | (InvZoneImageSource::Artifact, None) => { + bail!( + "invalid image source column combination: {:?}, {:?}", + self.image_source, + self.image_artifact_sha256 + ) + } + }; + Ok(OmicronZoneConfig { id: self.id.into(), filesystem_pool: self .filesystem_pool .map(|id| ZpoolName::new_external(id.into())), zone_type, - image_source: OmicronZoneImageSource::InstallDataset, + image_source, }) } } @@ -1983,7 +2026,7 @@ impl InvOmicronSledConfigZone { #[diesel(table_name = inv_omicron_sled_config_zone_nic)] pub struct InvOmicronSledConfigZoneNic { inv_collection_id: DbTypedUuid, - sled_config_id: Uuid, + pub sled_config_id: Uuid, pub id: Uuid, name: Name, ip: IpNetwork, diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index e01a6a34884..78e1eafa4b5 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -2443,11 +2443,12 @@ impl DataStore { public_error_from_diesel(e, ErrorHandler::Server) })?; paginator = p.found_batch(&batch, &|row| row.id); - nics.extend( - batch.into_iter().map(|found_zone_nic| { - (found_zone_nic.id, found_zone_nic) - }), - ); + nics.extend(batch.into_iter().map(|found_zone_nic| { + ( + (found_zone_nic.sled_config_id, found_zone_nic.id), + found_zone_nic, + ) + })); } nics @@ -2489,7 +2490,7 @@ impl DataStore { // inv_omicron_sled_config_zone with that id. This should // be impossible and reflects either a bug or database // corruption. - omicron_zone_nics.remove(&id).ok_or_else(|| { + omicron_zone_nics.remove(&(z.sled_config_id, id)).ok_or_else(|| { Error::internal_error(&format!( "zone {:?}: expected to find NIC {:?}, but didn't", z.id, z.nic_id @@ -3148,7 +3149,7 @@ mod test { use nexus_inventory::now_db_precision; use nexus_sled_agent_shared::inventory::{ ConfigReconcilerInventory, ConfigReconcilerInventoryResult, - ConfigReconcilerInventoryStatus, + ConfigReconcilerInventoryStatus, OmicronZoneImageSource, }; use nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_types::inventory::BaseboardId; @@ -3162,6 +3163,7 @@ mod test { use pretty_assertions::assert_eq; use std::num::NonZeroU32; use std::time::Duration; + use tufaceous_artifact::ArtifactHash; struct CollectionCounts { baseboards: usize, @@ -3911,7 +3913,7 @@ mod test { #[tokio::test] async fn test_reconciler_status_fields() { // Setup - let logctx = dev::test_setup_log("inventory_deletion"); + let logctx = dev::test_setup_log("reconciler_status_fields"); let db = TestDatabase::new_with_datastore(&logctx.log).await; let (opctx, datastore) = (db.opctx(), db.datastore()); @@ -3999,4 +4001,59 @@ mod test { db.terminate().await; logctx.cleanup_successful(); } + + #[tokio::test] + async fn test_zone_image_source() { + // Setup + let logctx = dev::test_setup_log("zone_image_source"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Start with a representative collection. + let Representative { builder, .. } = representative(); + let mut collection = builder.build(); + + // Mutate some zones on one of the sleds to have various image sources. + { + // Find a sled that has a few zones in one of its sled configs. + let sa = collection + .sled_agents + .values_mut() + .find(|sa| { + sa.ledgered_sled_config + .as_ref() + .map_or(false, |config| config.zones.len() >= 3) + }) + .expect("at least one sled has 3 or more zones"); + let mut zones_iter = + sa.ledgered_sled_config.as_mut().unwrap().zones.iter_mut(); + + let mut z1 = zones_iter.next().expect("at least 1 zone"); + let mut z2 = zones_iter.next().expect("at least 2 zones"); + let mut z3 = zones_iter.next().expect("at least 3 zones"); + z1.image_source = OmicronZoneImageSource::InstallDataset; + z2.image_source = OmicronZoneImageSource::Artifact { + hash: ArtifactHash([0; 32]), + }; + z3.image_source = OmicronZoneImageSource::Artifact { + hash: ArtifactHash([1; 32]), + }; + eprintln!("changed image sources: {z1:?} {z2:?} {z3:?}"); + } + + // Write it to the db; read it back and check it survived. + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect("failed to insert collection"); + let collection_read = datastore + .inventory_collection_read(&opctx, collection.id) + .await + .expect("failed to read collection back"); + assert_eq!(collection, collection_read); + + // Clean up. + db.terminate().await; + logctx.cleanup_successful(); + } } diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index b238535f81e..e5e6ecd23a4 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -44,6 +44,7 @@ define_enums! { InstanceStateEnum => "instance_state_v2", InstanceIntendedStateEnum => "instance_intended_state", InvConfigReconcilerStatusKindEnum => "inv_config_reconciler_status_kind", + InvZoneImageSourceEnum => "inv_zone_image_source", IpAttachStateEnum => "ip_attach_state", IpKindEnum => "ip_kind", IpPoolResourceTypeEnum => "ip_pool_resource_type", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index f635a2f027f..68e38df866a 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1712,6 +1712,9 @@ table! { snat_first_port -> Nullable, snat_last_port -> Nullable, filesystem_pool -> Nullable, + + image_source -> crate::enums::InvZoneImageSourceEnum, + image_artifact_sha256 -> Nullable, } } diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 9910418465c..dcd5b7df451 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3620,23 +3620,27 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( reconciler_status_kind inv_config_reconciler_status_kind NOT NULL, -- (foreign key into `inv_omicron_sled_config` table) -- only present if `reconciler_status_kind = 'running'` - reconciler_status_sled_config UUID CHECK ( + reconciler_status_sled_config UUID, + -- only present if `reconciler_status_kind != 'not-yet-run'` + reconciler_status_timestamp TIMESTAMPTZ, + -- only present if `reconciler_status_kind != 'not-yet-run'` + reconciler_status_duration_secs FLOAT, + + CONSTRAINT reconciler_status_sled_config_present_if_running CHECK ( (reconciler_status_kind = 'running' AND reconciler_status_sled_config IS NOT NULL) OR (reconciler_status_kind != 'running' AND reconciler_status_sled_config IS NULL) ), - -- only present if `reconciler_status_kind != 'not-yet-run'` - reconciler_status_timestamp TIMESTAMPTZ CHECK ( + CONSTRAINT reconciler_status_timestamp_present_unless_not_yet_run CHECK ( (reconciler_status_kind = 'not-yet-run' AND reconciler_status_timestamp IS NULL) OR (reconciler_status_kind != 'not-yet-run' AND reconciler_status_timestamp IS NOT NULL) ), - -- only present if `reconciler_status_kind != 'not-yet-run'` - reconciler_status_duration_secs FLOAT CHECK ( + CONSTRAINT reconciler_status_duration_present_unless_not_yet_run CHECK ( (reconciler_status_kind = 'not-yet-run' AND reconciler_status_duration_secs IS NULL) OR @@ -3835,6 +3839,11 @@ CREATE TYPE IF NOT EXISTS omicron.public.zone_type AS ENUM ( 'oximeter' ); +CREATE TYPE IF NOT EXISTS omicron.public.inv_zone_image_source AS ENUM ( + 'install_dataset', + 'artifact' +); + -- `zones` portion of an `OmicronSledConfig` observed from sled-agent CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone ( -- where this observation came from @@ -3902,6 +3911,18 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone ( -- Eventually, that nullability should be removed. filesystem_pool UUID, + -- zone image source + image_source omicron.public.inv_zone_image_source NOT NULL, + image_artifact_sha256 STRING(64), + + CONSTRAINT zone_image_source_artifact_hash_present CHECK ( + (image_source = 'artifact' + AND image_artifact_sha256 IS NOT NULL) + OR + (image_source != 'artifact' + AND image_artifact_sha256 IS NULL) + ), + PRIMARY KEY (inv_collection_id, sled_config_id, id) ); From 8b316e670e0563aecdb965857dc038a55416031b Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 19 May 2025 17:26:25 -0400 Subject: [PATCH 07/22] add schema migration --- nexus/db-model/src/schema_versions.rs | 3 +- nexus/tests/integration_tests/schema.rs | 25 ++++++++---- schema/crdb/dbinit.sql | 2 +- .../inventory-omicron-sled-config/README.md | 6 +++ .../inventory-omicron-sled-config/up01.sql | 22 +++++++++++ .../inventory-omicron-sled-config/up02.sql | 1 + .../inventory-omicron-sled-config/up03.sql | 1 + .../inventory-omicron-sled-config/up04.sql | 1 + .../inventory-omicron-sled-config/up05.sql | 6 +++ .../inventory-omicron-sled-config/up06.sql | 4 ++ .../inventory-omicron-sled-config/up07.sql | 2 + .../inventory-omicron-sled-config/up08.sql | 2 + .../inventory-omicron-sled-config/up09.sql | 2 + .../inventory-omicron-sled-config/up10.sql | 2 + .../inventory-omicron-sled-config/up11.sql | 2 + .../inventory-omicron-sled-config/up12.sql | 2 + .../inventory-omicron-sled-config/up13.sql | 2 + .../inventory-omicron-sled-config/up14.sql | 9 +++++ .../inventory-omicron-sled-config/up15.sql | 9 +++++ .../inventory-omicron-sled-config/up16.sql | 9 +++++ .../inventory-omicron-sled-config/up17.sql | 7 ++++ .../inventory-omicron-sled-config/up18.sql | 7 ++++ .../inventory-omicron-sled-config/up19.sql | 7 ++++ .../inventory-omicron-sled-config/up20.sql | 7 ++++ .../inventory-omicron-sled-config/up21.sql | 39 +++++++++++++++++++ .../inventory-omicron-sled-config/up22.sql | 7 ++++ .../inventory-omicron-sled-config/up23.sql | 13 +++++++ .../inventory-omicron-sled-config/up24.sql | 16 ++++++++ .../inventory-omicron-sled-config/up25.sql | 10 +++++ 29 files changed, 216 insertions(+), 9 deletions(-) create mode 100644 schema/crdb/inventory-omicron-sled-config/README.md create mode 100644 schema/crdb/inventory-omicron-sled-config/up01.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up02.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up03.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up04.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up05.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up06.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up07.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up08.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up09.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up10.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up11.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up12.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up13.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up14.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up15.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up16.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up17.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up18.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up19.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up20.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up21.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up22.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up23.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up24.sql create mode 100644 schema/crdb/inventory-omicron-sled-config/up25.sql diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 1582493e309..2a2249fff34 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(142, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(143, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(143, "inventory-omicron-sled-config"), KnownVersion::new(142, "bp-add-remove-mupdate-override"), KnownVersion::new(141, "caboose-sign-value"), KnownVersion::new(140, "instance-intended-state"), diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 9bdc3125219..0facef4b877 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -10,6 +10,7 @@ use nexus_config::SchemaConfig; use nexus_db_lookup::DataStoreConnection; use nexus_db_model::EARLIEST_SUPPORTED_VERSION; use nexus_db_model::SCHEMA_VERSION as LATEST_SCHEMA_VERSION; +use nexus_db_model::SchemaUpgradeStep; use nexus_db_model::{AllSchemaVersions, SchemaVersion}; use nexus_db_queries::db::DISALLOW_FULL_TABLE_SCAN_SQL; use nexus_db_queries::db::pub_test_utils::TestDatabase; @@ -25,6 +26,7 @@ use pretty_assertions::{assert_eq, assert_ne}; use semver::Version; use similar_asserts; use slog::Logger; +use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::net::IpAddr; use std::sync::Mutex; @@ -64,10 +66,16 @@ async fn test_setup<'a>( // Only returns an error if the transaction failed to commit. async fn apply_update_as_transaction_inner( client: &omicron_test_utils::dev::db::Client, - sql: &str, + step: &SchemaUpgradeStep, ) -> Result<(), tokio_postgres::Error> { client.batch_execute("BEGIN;").await.expect("Failed to BEGIN transaction"); - client.batch_execute(&sql).await.expect("Failed to execute update"); + if let Err(err) = client.batch_execute(step.sql()).await { + panic!( + "Failed to execute update step {}: {}", + step.label(), + InlineErrorChain::new(&err) + ); + }; client.batch_execute("COMMIT;").await?; Ok(()) } @@ -78,13 +86,16 @@ async fn apply_update_as_transaction_inner( async fn apply_update_as_transaction( log: &Logger, client: &omicron_test_utils::dev::db::Client, - sql: &str, + step: &SchemaUpgradeStep, ) { loop { - match apply_update_as_transaction_inner(client, sql).await { + match apply_update_as_transaction_inner(client, step).await { Ok(()) => break, Err(err) => { - warn!(log, "Failed to apply update as transaction"; "err" => err.to_string()); + warn!( + log, "Failed to apply update as transaction"; + InlineErrorChain::new(&err), + ); client .batch_execute("ROLLBACK;") .await @@ -95,7 +106,7 @@ async fn apply_update_as_transaction( continue; } } - panic!("Failed to apply update: {err}"); + panic!("Failed to apply update {}: {err}", step.label()); } } } @@ -142,7 +153,7 @@ async fn apply_update( ); for _ in 0..times_to_apply { - apply_update_as_transaction(&log, &client, step.sql()).await; + apply_update_as_transaction(&log, &client, step).await; // The following is a set of "versions exempt from being // re-applied" multiple times. PLEASE AVOID ADDING TO THIS LIST. diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index dcd5b7df451..b1acbe8099b 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -5676,7 +5676,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '142.0.0', NULL) + (TRUE, NOW(), NOW(), '143.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/inventory-omicron-sled-config/README.md b/schema/crdb/inventory-omicron-sled-config/README.md new file mode 100644 index 00000000000..016815f0df8 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/README.md @@ -0,0 +1,6 @@ +This directory contains migrations that make several changes to the +representation of inventory collections. The changes are not backwards +compatible: we're transitioning from recording only each sled's +`OmicronZonesConfig` to its entire `OmicronSledConfig`; we have no way of +backfilling the missing disk and dataset config information for preexisting +collections. Therefore, these migrations start by dropping all old collections. diff --git a/schema/crdb/inventory-omicron-sled-config/up01.sql b/schema/crdb/inventory-omicron-sled-config/up01.sql new file mode 100644 index 00000000000..6291d13498a --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up01.sql @@ -0,0 +1,22 @@ +-- Delete all existing inventory collections. Inventory collections are +-- routinely pruned by Nexus, so this should only delete a handful (3-6 +-- collections total). It does mean we will have no collections until Nexus +-- collects a new one after this migration completes, but that should happen +-- soon after Nexus starts and is not a prerequisite for the control plane +-- coming online. + +set local disallow_full_table_scans = off; + +-- This omits a few tables because we're about to drop them entirely anyway. +DELETE FROM omicron.public.inv_collection WHERE 1=1; +DELETE FROM omicron.public.inv_collection_error WHERE 1=1; +DELETE FROM omicron.public.inv_service_processor WHERE 1=1; +DELETE FROM omicron.public.inv_root_of_trust WHERE 1=1; +DELETE FROM omicron.public.inv_caboose WHERE 1=1; +DELETE FROM omicron.public.inv_root_of_trust_page WHERE 1=1; +DELETE FROM omicron.public.inv_sled_agent WHERE 1=1; +DELETE FROM omicron.public.inv_physical_disk WHERE 1=1; +DELETE FROM omicron.public.inv_nvme_disk_firmware WHERE 1=1; +DELETE FROM omicron.public.inv_zpool WHERE 1=1; +DELETE FROM omicron.public.inv_dataset WHERE 1=1; +DELETE FROM omicron.public.inv_clickhouse_keeper_membership WHERE 1=1; diff --git a/schema/crdb/inventory-omicron-sled-config/up02.sql b/schema/crdb/inventory-omicron-sled-config/up02.sql new file mode 100644 index 00000000000..4bb2c9cd87c --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up02.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.inv_omicron_zone_nic; diff --git a/schema/crdb/inventory-omicron-sled-config/up03.sql b/schema/crdb/inventory-omicron-sled-config/up03.sql new file mode 100644 index 00000000000..885910058ad --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up03.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.inv_omicron_zone; diff --git a/schema/crdb/inventory-omicron-sled-config/up04.sql b/schema/crdb/inventory-omicron-sled-config/up04.sql new file mode 100644 index 00000000000..3e86e9e013b --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up04.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.inv_sled_omicron_zones; diff --git a/schema/crdb/inventory-omicron-sled-config/up05.sql b/schema/crdb/inventory-omicron-sled-config/up05.sql new file mode 100644 index 00000000000..f9aad6f539b --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up05.sql @@ -0,0 +1,6 @@ +CREATE TYPE IF NOT EXISTS omicron.public.inv_config_reconciler_status_kind +AS ENUM ( + 'not-yet-run', + 'running', + 'idle' +); diff --git a/schema/crdb/inventory-omicron-sled-config/up06.sql b/schema/crdb/inventory-omicron-sled-config/up06.sql new file mode 100644 index 00000000000..81606ff93b4 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up06.sql @@ -0,0 +1,4 @@ +CREATE TYPE IF NOT EXISTS omicron.public.inv_zone_image_source AS ENUM ( + 'install_dataset', + 'artifact' +); diff --git a/schema/crdb/inventory-omicron-sled-config/up07.sql b/schema/crdb/inventory-omicron-sled-config/up07.sql new file mode 100644 index 00000000000..08099e00357 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up07.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_sled_agent + DROP COLUMN IF EXISTS omicron_physical_disks_generation; diff --git a/schema/crdb/inventory-omicron-sled-config/up08.sql b/schema/crdb/inventory-omicron-sled-config/up08.sql new file mode 100644 index 00000000000..aa0338e9f81 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up08.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_sled_agent + ADD COLUMN IF NOT EXISTS ledgered_sled_config UUID; diff --git a/schema/crdb/inventory-omicron-sled-config/up09.sql b/schema/crdb/inventory-omicron-sled-config/up09.sql new file mode 100644 index 00000000000..08d82a608c5 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up09.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_sled_agent + ADD COLUMN IF NOT EXISTS last_reconciliation_sled_config UUID; diff --git a/schema/crdb/inventory-omicron-sled-config/up10.sql b/schema/crdb/inventory-omicron-sled-config/up10.sql new file mode 100644 index 00000000000..4d0916d2b70 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up10.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_sled_agent + ADD COLUMN IF NOT EXISTS reconciler_status_kind inv_config_reconciler_status_kind NOT NULL; diff --git a/schema/crdb/inventory-omicron-sled-config/up11.sql b/schema/crdb/inventory-omicron-sled-config/up11.sql new file mode 100644 index 00000000000..96b61433887 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up11.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_sled_agent + ADD COLUMN IF NOT EXISTS reconciler_status_sled_config UUID; diff --git a/schema/crdb/inventory-omicron-sled-config/up12.sql b/schema/crdb/inventory-omicron-sled-config/up12.sql new file mode 100644 index 00000000000..c9ef703b5eb --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up12.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_sled_agent + ADD COLUMN IF NOT EXISTS reconciler_status_timestamp TIMESTAMPTZ; diff --git a/schema/crdb/inventory-omicron-sled-config/up13.sql b/schema/crdb/inventory-omicron-sled-config/up13.sql new file mode 100644 index 00000000000..d04a88d3b42 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up13.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_sled_agent + ADD COLUMN IF NOT EXISTS reconciler_status_duration_secs FLOAT; diff --git a/schema/crdb/inventory-omicron-sled-config/up14.sql b/schema/crdb/inventory-omicron-sled-config/up14.sql new file mode 100644 index 00000000000..3720d1dcf4b --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up14.sql @@ -0,0 +1,9 @@ +ALTER TABLE omicron.public.inv_sled_agent + ADD CONSTRAINT IF NOT EXISTS + reconciler_status_sled_config_present_if_running CHECK ( + (reconciler_status_kind = 'running' + AND reconciler_status_sled_config IS NOT NULL) + OR + (reconciler_status_kind != 'running' + AND reconciler_status_sled_config IS NULL) + ); diff --git a/schema/crdb/inventory-omicron-sled-config/up15.sql b/schema/crdb/inventory-omicron-sled-config/up15.sql new file mode 100644 index 00000000000..6f6ac000ef7 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up15.sql @@ -0,0 +1,9 @@ +ALTER TABLE omicron.public.inv_sled_agent + ADD CONSTRAINT IF NOT EXISTS + reconciler_status_timestamp_present_unless_not_yet_run CHECK ( + (reconciler_status_kind = 'not-yet-run' + AND reconciler_status_timestamp IS NULL) + OR + (reconciler_status_kind != 'not-yet-run' + AND reconciler_status_timestamp IS NOT NULL) + ); diff --git a/schema/crdb/inventory-omicron-sled-config/up16.sql b/schema/crdb/inventory-omicron-sled-config/up16.sql new file mode 100644 index 00000000000..727edbb55f1 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up16.sql @@ -0,0 +1,9 @@ +ALTER TABLE omicron.public.inv_sled_agent + ADD CONSTRAINT IF NOT EXISTS + reconciler_status_duration_present_unless_not_yet_run CHECK ( + (reconciler_status_kind = 'not-yet-run' + AND reconciler_status_duration_secs IS NULL) + OR + (reconciler_status_kind != 'not-yet-run' + AND reconciler_status_duration_secs IS NOT NULL) + ); diff --git a/schema/crdb/inventory-omicron-sled-config/up17.sql b/schema/crdb/inventory-omicron-sled-config/up17.sql new file mode 100644 index 00000000000..ec098436958 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up17.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config ( + inv_collection_id UUID NOT NULL, + id UUID NOT NULL, + generation INT8 NOT NULL, + remove_mupdate_override UUID, + PRIMARY KEY (inv_collection_id, id) +); diff --git a/schema/crdb/inventory-omicron-sled-config/up18.sql b/schema/crdb/inventory-omicron-sled-config/up18.sql new file mode 100644 index 00000000000..77fb24e2ec3 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up18.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_disk_result ( + inv_collection_id UUID NOT NULL, + sled_id UUID NOT NULL, + disk_id UUID NOT NULL, + error_message TEXT, + PRIMARY KEY (inv_collection_id, sled_id, disk_id) +); diff --git a/schema/crdb/inventory-omicron-sled-config/up19.sql b/schema/crdb/inventory-omicron-sled-config/up19.sql new file mode 100644 index 00000000000..269c103025b --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up19.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_dataset_result ( + inv_collection_id UUID NOT NULL, + sled_id UUID NOT NULL, + dataset_id UUID NOT NULL, + error_message TEXT, + PRIMARY KEY (inv_collection_id, sled_id, dataset_id) +); diff --git a/schema/crdb/inventory-omicron-sled-config/up20.sql b/schema/crdb/inventory-omicron-sled-config/up20.sql new file mode 100644 index 00000000000..7652b0f5d19 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up20.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_zone_result ( + inv_collection_id UUID NOT NULL, + sled_id UUID NOT NULL, + zone_id UUID NOT NULL, + error_message TEXT, + PRIMARY KEY (inv_collection_id, sled_id, zone_id) +); diff --git a/schema/crdb/inventory-omicron-sled-config/up21.sql b/schema/crdb/inventory-omicron-sled-config/up21.sql new file mode 100644 index 00000000000..8e9e4f1dee4 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up21.sql @@ -0,0 +1,39 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone ( + inv_collection_id UUID NOT NULL, + sled_config_id UUID NOT NULL, + id UUID NOT NULL, + zone_type omicron.public.zone_type NOT NULL, + primary_service_ip INET NOT NULL, + primary_service_port INT4 + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, + second_service_ip INET, + second_service_port INT4 + CHECK (second_service_port IS NULL + OR second_service_port BETWEEN 0 AND 65535), + dataset_zpool_name TEXT, + nic_id UUID, + dns_gz_address INET, + dns_gz_address_index INT8, + ntp_ntp_servers TEXT[], + ntp_dns_servers INET[], + ntp_domain TEXT, + nexus_external_tls BOOLEAN, + nexus_external_dns_servers INET ARRAY, + snat_ip INET, + snat_first_port INT4 + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + snat_last_port INT4 + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + filesystem_pool UUID, + image_source omicron.public.inv_zone_image_source NOT NULL, + image_artifact_sha256 STRING(64), + CONSTRAINT zone_image_source_artifact_hash_present CHECK ( + (image_source = 'artifact' + AND image_artifact_sha256 IS NOT NULL) + OR + (image_source != 'artifact' + AND image_artifact_sha256 IS NULL) + ), + PRIMARY KEY (inv_collection_id, sled_config_id, id) +); diff --git a/schema/crdb/inventory-omicron-sled-config/up22.sql b/schema/crdb/inventory-omicron-sled-config/up22.sql new file mode 100644 index 00000000000..e4eb5f19947 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up22.sql @@ -0,0 +1,7 @@ +CREATE INDEX IF NOT EXISTS inv_omicron_sled_config_zone_nic_id + ON omicron.public.inv_omicron_sled_config_zone (nic_id) + STORING ( + primary_service_ip, + second_service_ip, + snat_ip + ); diff --git a/schema/crdb/inventory-omicron-sled-config/up23.sql b/schema/crdb/inventory-omicron-sled-config/up23.sql new file mode 100644 index 00000000000..a4368e12158 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up23.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone_nic ( + inv_collection_id UUID NOT NULL, + sled_config_id UUID NOT NULL, + id UUID NOT NULL, + name TEXT NOT NULL, + ip INET NOT NULL, + mac INT8 NOT NULL, + subnet INET NOT NULL, + vni INT8 NOT NULL, + is_primary BOOLEAN NOT NULL, + slot INT2 NOT NULL, + PRIMARY KEY (inv_collection_id, sled_config_id, id) +); diff --git a/schema/crdb/inventory-omicron-sled-config/up24.sql b/schema/crdb/inventory-omicron-sled-config/up24.sql new file mode 100644 index 00000000000..7654cd2051c --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up24.sql @@ -0,0 +1,16 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_dataset ( + inv_collection_id UUID NOT NULL, + sled_config_id UUID NOT NULL, + id UUID NOT NULL, + pool_id UUID NOT NULL, + kind omicron.public.dataset_kind NOT NULL, + zone_name TEXT, + quota INT8, + reservation INT8, + compression TEXT NOT NULL, + CONSTRAINT zone_name_for_zone_kind CHECK ( + (kind != 'zone') OR + (kind = 'zone' AND zone_name IS NOT NULL) + ), + PRIMARY KEY (inv_collection_id, sled_config_id, id) +); diff --git a/schema/crdb/inventory-omicron-sled-config/up25.sql b/schema/crdb/inventory-omicron-sled-config/up25.sql new file mode 100644 index 00000000000..707e494cf25 --- /dev/null +++ b/schema/crdb/inventory-omicron-sled-config/up25.sql @@ -0,0 +1,10 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_disk ( + inv_collection_id UUID NOT NULL, + sled_config_id UUID NOT NULL, + id UUID NOT NULL, + vendor TEXT NOT NULL, + serial TEXT NOT NULL, + model TEXT NOT NULL, + pool_id UUID NOT NULL, + PRIMARY KEY (inv_collection_id, sled_config_id, id) +); From d06d6be81d507928e3bf7ba9209b3501a4539f61 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 19 May 2025 20:00:57 -0400 Subject: [PATCH 08/22] finish todo! --- dev-tools/omdb/src/bin/omdb/db.rs | 107 +++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 652e2965357..b8f7831eed8 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -125,6 +125,7 @@ use nexus_db_queries::db::queries::region_allocation; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::OmicronSledConfig; +use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneType; @@ -7423,8 +7424,112 @@ fn collect_config_reconciler_errors( } fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { + let OmicronSledConfig { + generation, + disks, + datasets, + zones, + remove_mupdate_override, + } = config; + println!("\n{label} SLED CONFIG"); - todo!("finish this method: {config:?}"); + println!(" generation: {}", generation); + println!(" remove_mupdate_override: {remove_mupdate_override:?}"); + + if disks.is_empty() { + println!(" disk config empty"); + } else { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct DiskRow { + id: Uuid, + zpool_id: Uuid, + vendor: String, + model: String, + serial: String, + } + + let rows = disks.iter().map(|d| DiskRow { + id: d.id.into_untyped_uuid(), + zpool_id: d.pool_id.into_untyped_uuid(), + vendor: d.identity.vendor.clone(), + model: d.identity.model.clone(), + serial: d.identity.serial.clone(), + }); + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(1, 1, 0, 0)) + .to_string(); + println!(" DISKS: {}", disks.len()); + println!("{table}"); + } + + if datasets.is_empty() { + println!(" dataset config empty"); + } else { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct DatasetRow { + id: Uuid, + name: String, + compression: String, + quota: String, + reservation: String, + } + + let rows = datasets.iter().map(|d| DatasetRow { + id: d.id.into_untyped_uuid(), + name: d.name.full_name(), + compression: d.inner.compression.to_string(), + quota: d + .inner + .quota + .map(|q| q.to_string()) + .unwrap_or_else(|| "none".to_string()), + reservation: d + .inner + .reservation + .map(|r| r.to_string()) + .unwrap_or_else(|| "none".to_string()), + }); + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(1, 1, 0, 0)) + .to_string(); + println!(" DATASETS: {}", datasets.len()); + println!("{table}"); + } + + if zones.is_empty() { + println!(" zone config empty"); + } else { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct ZoneRow { + id: Uuid, + kind: &'static str, + image_source: String, + } + + let rows = zones.iter().map(|z| ZoneRow { + id: z.id.into_untyped_uuid(), + kind: z.zone_type.kind().report_str(), + image_source: match &z.image_source { + OmicronZoneImageSource::InstallDataset => { + "install-dataset".to_string() + } + OmicronZoneImageSource::Artifact { hash } => { + format!("artifact: {hash}") + } + }, + }); + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(1, 1, 0, 0)) + .to_string(); + println!(" ZONES: {}", datasets.len()); + println!("{table}"); + } } fn inv_collection_print_keeper_membership(collection: &Collection) { From 446bc6114c90672c009fa296fe2d7b4f76aef365 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 19 May 2025 20:01:13 -0400 Subject: [PATCH 09/22] fixup tests --- nexus/inventory/src/examples.rs | 2 +- nexus/reconfigurator/execution/src/dns.rs | 3 ++- nexus/reconfigurator/planning/src/planner.rs | 24 ++++++++++---------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 69a72445e87..7647ef88011 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -644,7 +644,7 @@ pub fn sled_agent( ) -> Inventory { // Assume the `ledgered_sled_config` was reconciled successfully. let last_reconciliation = ledgered_sled_config.clone().map(|config| { - let external_disks = dbg!(&config) + let external_disks = config .disks .iter() .map(|d| (d.id, ConfigReconcilerInventoryResult::Ok)) diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 4f838a9dc9c..7d3b8187a2a 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -655,7 +655,8 @@ mod test { let mut blueprint_sleds = BTreeMap::new(); for (sled_id, sa) in collection.sled_agents { - let ledgered_sled_config = sa.ledgered_sled_config.unwrap(); + let ledgered_sled_config = + sa.ledgered_sled_config.unwrap_or_default(); // Convert the inventory `OmicronZonesConfig`s into // `BlueprintZoneConfig`s. This is going to get more painful over diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 3d681dda640..04b61483705 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -2048,22 +2048,10 @@ pub(crate) mod test { let mut collection = example.collection; let input = example.input; - // The initial collection configuration has generation 1 // The initial blueprint configuration has generation 2 let (sled_id, sled_config) = blueprint1.sleds.first_key_value().unwrap(); assert_eq!(sled_config.sled_agent_generation, Generation::from_u32(2)); - assert_eq!( - collection - .sled_agents - .get(&sled_id) - .unwrap() - .ledgered_sled_config - .as_ref() - .unwrap() - .generation, - Generation::new() - ); // All disks should have an `InService` disposition and `Active` state for disk in &sled_config.disks { @@ -4099,6 +4087,7 @@ pub(crate) mod test { // * same inventory as above // * inventory reports a new generation (but zone still running) // * inventory reports zone not running (but still the old generation) + eprintln!("planning with no inventory change..."); assert_planning_makes_no_changes( &logctx.log, &blueprint2, @@ -4106,6 +4095,15 @@ pub(crate) mod test { &collection, TEST_NAME, ); + // TODO-cleanup These checks depend on `last_reconciled_config`, which + // is not yet populated; uncomment these and check them by mutating + // `last_reconciled_config` once + // https://github.com/oxidecomputer/omicron/pull/8064 lands. We could + // just mutate `ledgered_sled_config` in the meantime (as this + // commented-out code does below), but that's not really checking what + // we care about. + /* + eprintln!("planning with generation bump but zone still running..."); assert_planning_makes_no_changes( &logctx.log, &blueprint2, @@ -4124,6 +4122,7 @@ pub(crate) mod test { }, TEST_NAME, ); + eprintln!("planning with zone gone but generation not bumped..."); assert_planning_makes_no_changes( &logctx.log, &blueprint2, @@ -4143,6 +4142,7 @@ pub(crate) mod test { }, TEST_NAME, ); + */ // Now make both changes to the inventory. { From b78b434997a70cf9b05c42f4fbcbfa0dbcc3166d Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 20 May 2025 10:25:36 -0400 Subject: [PATCH 10/22] fix db type namespace --- schema/crdb/dbinit.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index b1acbe8099b..320301ce260 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3617,7 +3617,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( last_reconciliation_sled_config UUID, -- Columns making up the status of the config reconciler. - reconciler_status_kind inv_config_reconciler_status_kind NOT NULL, + reconciler_status_kind omicron.public.inv_config_reconciler_status_kind NOT NULL, -- (foreign key into `inv_omicron_sled_config` table) -- only present if `reconciler_status_kind = 'running'` reconciler_status_sled_config UUID, From 87579cb15805889bd4ed77ef13e320a9421a537e Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 20 May 2025 10:36:27 -0400 Subject: [PATCH 11/22] openapi update --- openapi/sled-agent.json | 190 +++++++++++++++++++++++++++++++++------- 1 file changed, 159 insertions(+), 31 deletions(-) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 3aeac93c776..4279de8605d 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -3513,6 +3513,147 @@ } ] }, + "ConfigReconcilerInventory": { + "description": "Describes the last attempt made by the sled-agent-config-reconciler to reconcile the current sled config against the actual state of the sled.", + "type": "object", + "properties": { + "datasets": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ConfigReconcilerInventoryResult" + } + }, + "external_disks": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ConfigReconcilerInventoryResult" + } + }, + "last_reconciled_config": { + "$ref": "#/components/schemas/OmicronSledConfig" + }, + "zones": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ConfigReconcilerInventoryResult" + } + } + }, + "required": [ + "datasets", + "external_disks", + "last_reconciled_config", + "zones" + ] + }, + "ConfigReconcilerInventoryResult": { + "oneOf": [ + { + "type": "object", + "properties": { + "result": { + "type": "string", + "enum": [ + "ok" + ] + } + }, + "required": [ + "result" + ] + }, + { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "result": { + "type": "string", + "enum": [ + "err" + ] + } + }, + "required": [ + "message", + "result" + ] + } + ] + }, + "ConfigReconcilerInventoryStatus": { + "description": "Status of the sled-agent-config-reconciler task.", + "oneOf": [ + { + "description": "The reconciler task has not yet run for the first time since sled-agent started.", + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": [ + "not_yet_run" + ] + } + }, + "required": [ + "status" + ] + }, + { + "description": "The reconciler task is actively running.", + "type": "object", + "properties": { + "config": { + "$ref": "#/components/schemas/OmicronSledConfig" + }, + "running_for": { + "$ref": "#/components/schemas/Duration" + }, + "started_at": { + "type": "string", + "format": "date-time" + }, + "status": { + "type": "string", + "enum": [ + "running" + ] + } + }, + "required": [ + "config", + "running_for", + "started_at", + "status" + ] + }, + { + "description": "The reconciler task is currently idle, but previously did complete a reconciliation attempt.\n\nThis variant does not include the `OmicronSledConfig` used in the last attempt, because that's always available via [`ConfigReconcilerInventory::last_reconciled_config`].", + "type": "object", + "properties": { + "completed_at": { + "type": "string", + "format": "date-time" + }, + "ran_for": { + "$ref": "#/components/schemas/Duration" + }, + "status": { + "type": "string", + "enum": [ + "idle" + ] + } + }, + "required": [ + "completed_at", + "ran_for", + "status" + ] + } + ] + }, "Cpuid": { "description": "A set of CPUID values to expose to a guest.", "type": "object", @@ -4889,11 +5030,24 @@ "$ref": "#/components/schemas/InventoryDisk" } }, - "omicron_physical_disks_generation": { - "$ref": "#/components/schemas/Generation" + "last_reconciliation": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/ConfigReconcilerInventory" + } + ] }, - "omicron_zones": { - "$ref": "#/components/schemas/OmicronZonesConfig" + "ledgered_sled_config": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/OmicronSledConfig" + } + ] + }, + "reconciler_status": { + "$ref": "#/components/schemas/ConfigReconcilerInventoryStatus" }, "reservoir_size": { "$ref": "#/components/schemas/ByteCount" @@ -4926,8 +5080,7 @@ "baseboard", "datasets", "disks", - "omicron_physical_disks_generation", - "omicron_zones", + "reconciler_status", "reservoir_size", "sled_agent_address", "sled_id", @@ -5991,31 +6144,6 @@ } ] }, - "OmicronZonesConfig": { - "description": "Describes the set of Omicron-managed zones running on a sled", - "type": "object", - "properties": { - "generation": { - "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - }, - "zones": { - "description": "list of running zones", - "type": "array", - "items": { - "$ref": "#/components/schemas/OmicronZoneConfig" - } - } - }, - "required": [ - "generation", - "zones" - ] - }, "P9fs": { "description": "Describes a filesystem to expose through a P9 device.\n\nThis is only supported by Propolis servers compiled with the `falcon` feature.", "type": "object", From 9f13465a7e0d08291bbf16831463c37a49fcf962 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 20 May 2025 10:39:33 -0400 Subject: [PATCH 12/22] expectorate --- nexus/inventory/tests/output/collector_basic.txt | 14 ++++++++++---- .../tests/output/collector_sled_agent_errors.txt | 7 +++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/nexus/inventory/tests/output/collector_basic.txt b/nexus/inventory/tests/output/collector_basic.txt index b3b70755574..3eed7f61adb 100644 --- a/nexus/inventory/tests/output/collector_basic.txt +++ b/nexus/inventory/tests/output/collector_basic.txt @@ -89,13 +89,19 @@ rot pages found: sled agents found: sled 03265caf-da7d-46c7-b1c2-39fa90ce5c65 (Scrimlet) baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-03265caf-da7d-46c7-b1c2-39fa90ce5c65" }) - zone generation: Generation(3) - zones found: + ledgered sled config: + generation: 3 + remove_mupdate_override: None zone 8b88a56f-3eb6-4d80-ba42-75d867bc427d type oximeter + no completed reconciliation + reconciler task not yet run sled 9cb9b78f-5614-440c-b66d-e8e81fab69b0 (Scrimlet) baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" }) - zone generation: Generation(3) - zones found: + ledgered sled config: + generation: 3 + remove_mupdate_override: None zone 5125277f-0988-490b-ac01-3bba20cc8f07 type oximeter + no completed reconciliation + reconciler task not yet run errors: diff --git a/nexus/inventory/tests/output/collector_sled_agent_errors.txt b/nexus/inventory/tests/output/collector_sled_agent_errors.txt index 113d5c89c08..e4f3efc8739 100644 --- a/nexus/inventory/tests/output/collector_sled_agent_errors.txt +++ b/nexus/inventory/tests/output/collector_sled_agent_errors.txt @@ -88,9 +88,12 @@ rot pages found: sled agents found: sled 9cb9b78f-5614-440c-b66d-e8e81fab69b0 (Scrimlet) baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" }) - zone generation: Generation(3) - zones found: + ledgered sled config: + generation: 3 + remove_mupdate_override: None zone 5125277f-0988-490b-ac01-3bba20cc8f07 type oximeter + no completed reconciliation + reconciler task not yet run errors: error: Sled Agent "http://[100::1]:45678": inventory: Communication Error <> From 527421491ca1243386f7a9a5b79a874ef503b4ff Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 20 May 2025 13:07:42 -0400 Subject: [PATCH 13/22] report empty disk/dataset configs before RSS --- sled-agent/src/sim/sled_agent.rs | 7 +++---- sled-agent/src/sled_agent.rs | 22 ++++++++++++++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 977d6145e07..61adf7e0c59 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -729,11 +729,10 @@ impl SledAgent { let storage = self.storage.lock(); - let disks_config = storage - .omicron_physical_disks_list() - .context("list disks config")?; + let disks_config = + storage.omicron_physical_disks_list().unwrap_or_default(); let datasets_config = - storage.datasets_config_list().context("list datasets config")?; + storage.datasets_config_list().unwrap_or_default(); let zones_config = self.fake_zones.lock().unwrap().clone(); let sled_config = OmicronSledConfig { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 7e0c9226bf7..cff825ba474 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -1368,10 +1368,24 @@ impl SledAgent { self.storage().datasets_config_list(), self.inner.services.omicron_zones_list() ); - let disks_config = - disks_config.map_err(InventoryError::GetDisksConfig)?; - let datasets_config = - datasets_config.map_err(InventoryError::GetDatasetsConfig)?; + + // RSS asks for our inventory _before_ it sends us an + // `OmicronSledConfig`; echo back the default (empty) disk and dataset + // configs if we have no ledger at all. + let disks_config = match disks_config { + Ok(disks_config) => disks_config, + Err(sled_storage::error::Error::LedgerNotFound) => { + OmicronPhysicalDisksConfig::default() + } + Err(err) => return Err(InventoryError::GetDisksConfig(err)), + }; + let datasets_config = match datasets_config { + Ok(datasets_config) => datasets_config, + Err(sled_storage::error::Error::LedgerNotFound) => { + DatasetsConfig::default() + } + Err(err) => return Err(InventoryError::GetDatasetsConfig(err)), + }; for (identity, variant, slot, firmware) in all_disks.iter_all() { disks.push(InventoryDisk { From 4fe5d13750faf245adccd69c4622991d84092900 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 20 May 2025 14:42:46 -0400 Subject: [PATCH 14/22] omdb output cleanup --- dev-tools/omdb/src/bin/omdb/db.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index b8f7831eed8..852c49c2579 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -7458,7 +7458,7 @@ fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { }); let table = tabled::Table::new(rows) .with(tabled::settings::Style::empty()) - .with(tabled::settings::Padding::new(1, 1, 0, 0)) + .with(tabled::settings::Padding::new(8, 1, 0, 0)) .to_string(); println!(" DISKS: {}", disks.len()); println!("{table}"); @@ -7494,7 +7494,7 @@ fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { }); let table = tabled::Table::new(rows) .with(tabled::settings::Style::empty()) - .with(tabled::settings::Padding::new(1, 1, 0, 0)) + .with(tabled::settings::Padding::new(8, 1, 0, 0)) .to_string(); println!(" DATASETS: {}", datasets.len()); println!("{table}"); @@ -7525,9 +7525,9 @@ fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { }); let table = tabled::Table::new(rows) .with(tabled::settings::Style::empty()) - .with(tabled::settings::Padding::new(1, 1, 0, 0)) + .with(tabled::settings::Padding::new(8, 1, 0, 0)) .to_string(); - println!(" ZONES: {}", datasets.len()); + println!(" ZONES: {}", zones.len()); println!("{table}"); } } From 15738d60d2071daa5e8101b55d749cb5e6eed602 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 21 May 2025 10:52:45 -0400 Subject: [PATCH 15/22] use strongly-typed IDs in Tabled structs --- dev-tools/omdb/src/bin/omdb/db.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 852c49c2579..4eb7c64e2a6 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -152,6 +152,7 @@ use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::DownstairsRegionUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::ParseError; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::PropolisUuid; @@ -7442,16 +7443,16 @@ fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] struct DiskRow { - id: Uuid, - zpool_id: Uuid, + id: PhysicalDiskUuid, + zpool_id: ZpoolUuid, vendor: String, model: String, serial: String, } let rows = disks.iter().map(|d| DiskRow { - id: d.id.into_untyped_uuid(), - zpool_id: d.pool_id.into_untyped_uuid(), + id: d.id, + zpool_id: d.pool_id, vendor: d.identity.vendor.clone(), model: d.identity.model.clone(), serial: d.identity.serial.clone(), @@ -7470,7 +7471,7 @@ fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] struct DatasetRow { - id: Uuid, + id: DatasetUuid, name: String, compression: String, quota: String, @@ -7478,7 +7479,7 @@ fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { } let rows = datasets.iter().map(|d| DatasetRow { - id: d.id.into_untyped_uuid(), + id: d.id, name: d.name.full_name(), compression: d.inner.compression.to_string(), quota: d @@ -7506,13 +7507,13 @@ fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) { #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] struct ZoneRow { - id: Uuid, + id: OmicronZoneUuid, kind: &'static str, image_source: String, } let rows = zones.iter().map(|z| ZoneRow { - id: z.id.into_untyped_uuid(), + id: z.id, kind: z.zone_type.kind().report_str(), image_source: match &z.image_source { OmicronZoneImageSource::InstallDataset => { From 769d98e434908ec470d6ef416c86c20ebdcac51c Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 21 May 2025 10:59:59 -0400 Subject: [PATCH 16/22] expand comment --- .../db-queries/src/db/datastore/inventory.rs | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 78e1eafa4b5..36ba7550346 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -213,10 +213,25 @@ impl DataStore { }) .collect(); - // Build up a list of `OmicronSledConfig`s we need to insert (each sled - // has 0-3), all of their zones, zone NICs, datasets, and disks, and - // also collect the config reconciler properties for each sled. We need - // these to construct `InvSledAgent` instances below. + // Build up a list of `OmicronSledConfig`s we need to insert. Each sled + // has 0-3: + // + // * The ledgered sled config (if the sled has gotten a config from RSS + // or Nexus) + // * The most-recently-reconciled config (if the sled-agent's config + // reconciler has run since the last time it started) + // * The currently-being-reconciled config (if the sled-agent's config + // reconciler was actively running when inventory was collected) + // + // If more than one of these are present, they may be equal or distinct; + // for any that are equal, we'll only insert one copy and reuse the + // foreign key ID for subsequent instances. + // + // For each of these configs, we need to insert the top-level scalar + // data (held in an `InvOmicronSledConfig`), plus all of their zones, + // zone NICs, datasets, and disks, and we need collect the config + // reconciler properties for each sled. We need all of this to construct + // `InvSledAgent` instances below. let ConfigReconcilerRows { sled_configs: omicron_sled_configs, disks: omicron_sled_config_disks, From f97bb7003e83b3090e6a88fce1e2601082f5411f Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 21 May 2025 11:00:07 -0400 Subject: [PATCH 17/22] remove dead code --- .../db-queries/src/db/datastore/inventory.rs | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 36ba7550346..b5f541f5522 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -1162,34 +1162,6 @@ impl DataStore { .await?; } - /* - // Insert all the Omicron zones that we found. - { - use nexus_db_schema::schema::inv_sled_omicron_zones::dsl as sled_zones; - let _ = diesel::insert_into(sled_zones::inv_sled_omicron_zones) - .values(sled_omicron_zones) - .execute_async(&conn) - .await?; - } - - { - use nexus_db_schema::schema::inv_omicron_zone::dsl as omicron_zone; - let _ = diesel::insert_into(omicron_zone::inv_omicron_zone) - .values(omicron_zones) - .execute_async(&conn) - .await?; - } - - { - use nexus_db_schema::schema::inv_omicron_zone_nic::dsl as omicron_zone_nic; - let _ = - diesel::insert_into(omicron_zone_nic::inv_omicron_zone_nic) - .values(omicron_zone_nics) - .execute_async(&conn) - .await?; - } - */ - // Insert the clickhouse keeper memberships we've received { use nexus_db_schema::schema::inv_clickhouse_keeper_membership::dsl; From 577483b823a7bd0df1a771fc7321a516da6bed12 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 21 May 2025 11:04:50 -0400 Subject: [PATCH 18/22] struct with named fields over huge tuple --- .../db-queries/src/db/datastore/inventory.rs | 41 +++++++++++++++---- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index b5f541f5522..13dc34b2384 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -1430,7 +1430,32 @@ impl DataStore { let conn = self.pool_connection_authorized(opctx).await?; let db_collection_id = to_db_typed_uuid(collection_id); - let ( + // Helper to pack and unpack all the counts of rows we delete in the + // transaction below (exclusively used for logging). + struct NumRowsDeleted { + ncollections: usize, + nsps: usize, + nrots: usize, + ncabooses: usize, + nrot_pages: usize, + nsled_agents: usize, + ndatasets: usize, + nphysical_disks: usize, + nnvme_disk_firmware: usize, + nlast_reconciliation_disk_results: usize, + nlast_reconciliation_dataset_results: usize, + nlast_reconciliation_zone_results: usize, + nomicron_sled_configs: usize, + nomicron_sled_config_disks: usize, + nomicron_sled_config_datasets: usize, + nomicron_sled_config_zones: usize, + nomicron_sled_config_zone_nics: usize, + nzpools: usize, + nerrors: usize, + nclickhouse_keeper_membership: usize, + } + + let NumRowsDeleted { ncollections, nsps, nrots, @@ -1439,7 +1464,7 @@ impl DataStore { nsled_agents, ndatasets, nphysical_disks, - nnvme_disk_disk_firmware, + nnvme_disk_firmware, nlast_reconciliation_disk_results, nlast_reconciliation_dataset_results, nlast_reconciliation_zone_results, @@ -1451,7 +1476,7 @@ impl DataStore { nzpools, nerrors, nclickhouse_keeper_membership, - ) = + } = self.transaction_retry_wrapper("inventory_delete_collection") .transaction(&conn, |conn| async move { // Remove the record describing the collection itself. @@ -1536,7 +1561,7 @@ impl DataStore { }; // Remove rows for NVMe physical disk firmware found. - let nnvme_disk_firwmare = { + let nnvme_disk_firmware = { use nexus_db_schema::schema::inv_nvme_disk_firmware::dsl; diesel::delete(dsl::inv_nvme_disk_firmware.filter( dsl::inv_collection_id.eq(db_collection_id), @@ -1645,7 +1670,7 @@ impl DataStore { .await? }; - Ok(( + Ok(NumRowsDeleted { ncollections, nsps, nrots, @@ -1654,7 +1679,7 @@ impl DataStore { nsled_agents, ndatasets, nphysical_disks, - nnvme_disk_firwmare, + nnvme_disk_firmware, nlast_reconciliation_disk_results, nlast_reconciliation_dataset_results, nlast_reconciliation_zone_results, @@ -1666,7 +1691,7 @@ impl DataStore { nzpools, nerrors, nclickhouse_keeper_membership, - )) + }) }) .await .map_err(|error| { @@ -1683,7 +1708,7 @@ impl DataStore { "nsled_agents" => nsled_agents, "ndatasets" => ndatasets, "nphysical_disks" => nphysical_disks, - "nnvme_disk_firmware" => nnvme_disk_disk_firmware, + "nnvme_disk_firmware" => nnvme_disk_firmware, "nlast_reconciliation_disk_results" => nlast_reconciliation_disk_results, "nlast_reconciliation_dataset_results" => From 5a9362296fc5bc24d982035edb77351e8237fa27 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 21 May 2025 11:06:12 -0400 Subject: [PATCH 19/22] slf -> this --- nexus/db-queries/src/db/datastore/inventory.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 13dc34b2384..d25cf0279ab 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -2916,11 +2916,11 @@ impl ConfigReconcilerRows { collection_id: CollectionUuid, collection: &Collection, ) -> anyhow::Result { - let mut slf = Self::default(); + let mut this = Self::default(); for (sled_id, sled_agent) in &collection.sled_agents { - slf.accumulate(collection_id, *sled_id, sled_agent)?; + this.accumulate(collection_id, *sled_id, sled_agent)?; } - Ok(slf) + Ok(this) } fn accumulate( From a7c62c524e8fd03fa80388dfedce423c2892511d Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 21 May 2025 11:10:25 -0400 Subject: [PATCH 20/22] clearer InvOmicronSledConfig::new() --- nexus/db-model/src/inventory.rs | 11 ++++++----- nexus/db-queries/src/db/datastore/inventory.rs | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 1a9de2994a6..79fb12bb137 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -45,6 +45,7 @@ use nexus_types::inventory::{ BaseboardId, Caboose, Collection, NvmeFirmware, PowerState, RotPage, RotSlot, }; +use omicron_common::api::external; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetName; @@ -55,6 +56,7 @@ use omicron_uuid_kinds::DatasetKind; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::MupdateOverrideKind; +use omicron_uuid_kinds::MupdateOverrideUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledKind; use omicron_uuid_kinds::SledUuid; @@ -1413,15 +1415,14 @@ impl InvOmicronSledConfig { pub fn new( inv_collection_id: CollectionUuid, id: Uuid, - config: &OmicronSledConfig, + generation: external::Generation, + remove_mupdate_override: Option, ) -> Self { Self { inv_collection_id: inv_collection_id.into(), id, - generation: Generation(config.generation), - remove_mupdate_override: config - .remove_mupdate_override - .map(From::from), + generation: Generation(generation), + remove_mupdate_override: remove_mupdate_override.map(From::from), } } } diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index d25cf0279ab..1491658a416 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -3063,7 +3063,8 @@ impl ConfigReconcilerRows { self.sled_configs.push(InvOmicronSledConfig::new( collection_id, sled_config_id, - config, + config.generation, + config.remove_mupdate_override, )); self.disks.extend(config.disks.iter().map(|disk| { InvOmicronSledConfigDisk::new( From 7ee7e6eef7f7a780fd656c8a4dc67cd26736e632 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 21 May 2025 11:23:33 -0400 Subject: [PATCH 21/22] combine closely-related CHECK constraints --- schema/crdb/dbinit.sql | 11 ++--- .../inventory-omicron-sled-config/up15.sql | 8 ++-- .../inventory-omicron-sled-config/up16.sql | 16 +++---- .../inventory-omicron-sled-config/up17.sql | 10 ++-- .../inventory-omicron-sled-config/up18.sql | 6 +-- .../inventory-omicron-sled-config/up19.sql | 6 +-- .../inventory-omicron-sled-config/up20.sql | 42 +++++++++++++++-- .../inventory-omicron-sled-config/up21.sql | 46 +++---------------- .../inventory-omicron-sled-config/up22.sql | 20 +++++--- .../inventory-omicron-sled-config/up23.sql | 19 ++++---- .../inventory-omicron-sled-config/up24.sql | 14 ++---- .../inventory-omicron-sled-config/up25.sql | 10 ---- 12 files changed, 98 insertions(+), 110 deletions(-) delete mode 100644 schema/crdb/inventory-omicron-sled-config/up25.sql diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 320301ce260..4f1b310089f 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3633,18 +3633,13 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( (reconciler_status_kind != 'running' AND reconciler_status_sled_config IS NULL) ), - CONSTRAINT reconciler_status_timestamp_present_unless_not_yet_run CHECK ( - (reconciler_status_kind = 'not-yet-run' - AND reconciler_status_timestamp IS NULL) - OR - (reconciler_status_kind != 'not-yet-run' - AND reconciler_status_timestamp IS NOT NULL) - ), - CONSTRAINT reconciler_status_duration_present_unless_not_yet_run CHECK ( + CONSTRAINT reconciler_status_timing_present_unless_not_yet_run CHECK ( (reconciler_status_kind = 'not-yet-run' + AND reconciler_status_timestamp IS NULL AND reconciler_status_duration_secs IS NULL) OR (reconciler_status_kind != 'not-yet-run' + AND reconciler_status_timestamp IS NOT NULL AND reconciler_status_duration_secs IS NOT NULL) ), diff --git a/schema/crdb/inventory-omicron-sled-config/up15.sql b/schema/crdb/inventory-omicron-sled-config/up15.sql index 6f6ac000ef7..f17e4ae9a47 100644 --- a/schema/crdb/inventory-omicron-sled-config/up15.sql +++ b/schema/crdb/inventory-omicron-sled-config/up15.sql @@ -1,9 +1,11 @@ ALTER TABLE omicron.public.inv_sled_agent ADD CONSTRAINT IF NOT EXISTS - reconciler_status_timestamp_present_unless_not_yet_run CHECK ( + reconciler_status_timing_present_unless_not_yet_run CHECK ( (reconciler_status_kind = 'not-yet-run' - AND reconciler_status_timestamp IS NULL) + AND reconciler_status_timestamp IS NULL + AND reconciler_status_duration_secs IS NULL) OR (reconciler_status_kind != 'not-yet-run' - AND reconciler_status_timestamp IS NOT NULL) + AND reconciler_status_timestamp IS NOT NULL + AND reconciler_status_duration_secs IS NOT NULL) ); diff --git a/schema/crdb/inventory-omicron-sled-config/up16.sql b/schema/crdb/inventory-omicron-sled-config/up16.sql index 727edbb55f1..ec098436958 100644 --- a/schema/crdb/inventory-omicron-sled-config/up16.sql +++ b/schema/crdb/inventory-omicron-sled-config/up16.sql @@ -1,9 +1,7 @@ -ALTER TABLE omicron.public.inv_sled_agent - ADD CONSTRAINT IF NOT EXISTS - reconciler_status_duration_present_unless_not_yet_run CHECK ( - (reconciler_status_kind = 'not-yet-run' - AND reconciler_status_duration_secs IS NULL) - OR - (reconciler_status_kind != 'not-yet-run' - AND reconciler_status_duration_secs IS NOT NULL) - ); +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config ( + inv_collection_id UUID NOT NULL, + id UUID NOT NULL, + generation INT8 NOT NULL, + remove_mupdate_override UUID, + PRIMARY KEY (inv_collection_id, id) +); diff --git a/schema/crdb/inventory-omicron-sled-config/up17.sql b/schema/crdb/inventory-omicron-sled-config/up17.sql index ec098436958..77fb24e2ec3 100644 --- a/schema/crdb/inventory-omicron-sled-config/up17.sql +++ b/schema/crdb/inventory-omicron-sled-config/up17.sql @@ -1,7 +1,7 @@ -CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config ( +CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_disk_result ( inv_collection_id UUID NOT NULL, - id UUID NOT NULL, - generation INT8 NOT NULL, - remove_mupdate_override UUID, - PRIMARY KEY (inv_collection_id, id) + sled_id UUID NOT NULL, + disk_id UUID NOT NULL, + error_message TEXT, + PRIMARY KEY (inv_collection_id, sled_id, disk_id) ); diff --git a/schema/crdb/inventory-omicron-sled-config/up18.sql b/schema/crdb/inventory-omicron-sled-config/up18.sql index 77fb24e2ec3..269c103025b 100644 --- a/schema/crdb/inventory-omicron-sled-config/up18.sql +++ b/schema/crdb/inventory-omicron-sled-config/up18.sql @@ -1,7 +1,7 @@ -CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_disk_result ( +CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_dataset_result ( inv_collection_id UUID NOT NULL, sled_id UUID NOT NULL, - disk_id UUID NOT NULL, + dataset_id UUID NOT NULL, error_message TEXT, - PRIMARY KEY (inv_collection_id, sled_id, disk_id) + PRIMARY KEY (inv_collection_id, sled_id, dataset_id) ); diff --git a/schema/crdb/inventory-omicron-sled-config/up19.sql b/schema/crdb/inventory-omicron-sled-config/up19.sql index 269c103025b..7652b0f5d19 100644 --- a/schema/crdb/inventory-omicron-sled-config/up19.sql +++ b/schema/crdb/inventory-omicron-sled-config/up19.sql @@ -1,7 +1,7 @@ -CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_dataset_result ( +CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_zone_result ( inv_collection_id UUID NOT NULL, sled_id UUID NOT NULL, - dataset_id UUID NOT NULL, + zone_id UUID NOT NULL, error_message TEXT, - PRIMARY KEY (inv_collection_id, sled_id, dataset_id) + PRIMARY KEY (inv_collection_id, sled_id, zone_id) ); diff --git a/schema/crdb/inventory-omicron-sled-config/up20.sql b/schema/crdb/inventory-omicron-sled-config/up20.sql index 7652b0f5d19..8e9e4f1dee4 100644 --- a/schema/crdb/inventory-omicron-sled-config/up20.sql +++ b/schema/crdb/inventory-omicron-sled-config/up20.sql @@ -1,7 +1,39 @@ -CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_zone_result ( +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone ( inv_collection_id UUID NOT NULL, - sled_id UUID NOT NULL, - zone_id UUID NOT NULL, - error_message TEXT, - PRIMARY KEY (inv_collection_id, sled_id, zone_id) + sled_config_id UUID NOT NULL, + id UUID NOT NULL, + zone_type omicron.public.zone_type NOT NULL, + primary_service_ip INET NOT NULL, + primary_service_port INT4 + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, + second_service_ip INET, + second_service_port INT4 + CHECK (second_service_port IS NULL + OR second_service_port BETWEEN 0 AND 65535), + dataset_zpool_name TEXT, + nic_id UUID, + dns_gz_address INET, + dns_gz_address_index INT8, + ntp_ntp_servers TEXT[], + ntp_dns_servers INET[], + ntp_domain TEXT, + nexus_external_tls BOOLEAN, + nexus_external_dns_servers INET ARRAY, + snat_ip INET, + snat_first_port INT4 + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + snat_last_port INT4 + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + filesystem_pool UUID, + image_source omicron.public.inv_zone_image_source NOT NULL, + image_artifact_sha256 STRING(64), + CONSTRAINT zone_image_source_artifact_hash_present CHECK ( + (image_source = 'artifact' + AND image_artifact_sha256 IS NOT NULL) + OR + (image_source != 'artifact' + AND image_artifact_sha256 IS NULL) + ), + PRIMARY KEY (inv_collection_id, sled_config_id, id) ); diff --git a/schema/crdb/inventory-omicron-sled-config/up21.sql b/schema/crdb/inventory-omicron-sled-config/up21.sql index 8e9e4f1dee4..e4eb5f19947 100644 --- a/schema/crdb/inventory-omicron-sled-config/up21.sql +++ b/schema/crdb/inventory-omicron-sled-config/up21.sql @@ -1,39 +1,7 @@ -CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone ( - inv_collection_id UUID NOT NULL, - sled_config_id UUID NOT NULL, - id UUID NOT NULL, - zone_type omicron.public.zone_type NOT NULL, - primary_service_ip INET NOT NULL, - primary_service_port INT4 - CHECK (primary_service_port BETWEEN 0 AND 65535) - NOT NULL, - second_service_ip INET, - second_service_port INT4 - CHECK (second_service_port IS NULL - OR second_service_port BETWEEN 0 AND 65535), - dataset_zpool_name TEXT, - nic_id UUID, - dns_gz_address INET, - dns_gz_address_index INT8, - ntp_ntp_servers TEXT[], - ntp_dns_servers INET[], - ntp_domain TEXT, - nexus_external_tls BOOLEAN, - nexus_external_dns_servers INET ARRAY, - snat_ip INET, - snat_first_port INT4 - CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), - snat_last_port INT4 - CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), - filesystem_pool UUID, - image_source omicron.public.inv_zone_image_source NOT NULL, - image_artifact_sha256 STRING(64), - CONSTRAINT zone_image_source_artifact_hash_present CHECK ( - (image_source = 'artifact' - AND image_artifact_sha256 IS NOT NULL) - OR - (image_source != 'artifact' - AND image_artifact_sha256 IS NULL) - ), - PRIMARY KEY (inv_collection_id, sled_config_id, id) -); +CREATE INDEX IF NOT EXISTS inv_omicron_sled_config_zone_nic_id + ON omicron.public.inv_omicron_sled_config_zone (nic_id) + STORING ( + primary_service_ip, + second_service_ip, + snat_ip + ); diff --git a/schema/crdb/inventory-omicron-sled-config/up22.sql b/schema/crdb/inventory-omicron-sled-config/up22.sql index e4eb5f19947..a4368e12158 100644 --- a/schema/crdb/inventory-omicron-sled-config/up22.sql +++ b/schema/crdb/inventory-omicron-sled-config/up22.sql @@ -1,7 +1,13 @@ -CREATE INDEX IF NOT EXISTS inv_omicron_sled_config_zone_nic_id - ON omicron.public.inv_omicron_sled_config_zone (nic_id) - STORING ( - primary_service_ip, - second_service_ip, - snat_ip - ); +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone_nic ( + inv_collection_id UUID NOT NULL, + sled_config_id UUID NOT NULL, + id UUID NOT NULL, + name TEXT NOT NULL, + ip INET NOT NULL, + mac INT8 NOT NULL, + subnet INET NOT NULL, + vni INT8 NOT NULL, + is_primary BOOLEAN NOT NULL, + slot INT2 NOT NULL, + PRIMARY KEY (inv_collection_id, sled_config_id, id) +); diff --git a/schema/crdb/inventory-omicron-sled-config/up23.sql b/schema/crdb/inventory-omicron-sled-config/up23.sql index a4368e12158..7654cd2051c 100644 --- a/schema/crdb/inventory-omicron-sled-config/up23.sql +++ b/schema/crdb/inventory-omicron-sled-config/up23.sql @@ -1,13 +1,16 @@ -CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_zone_nic ( +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_dataset ( inv_collection_id UUID NOT NULL, sled_config_id UUID NOT NULL, id UUID NOT NULL, - name TEXT NOT NULL, - ip INET NOT NULL, - mac INT8 NOT NULL, - subnet INET NOT NULL, - vni INT8 NOT NULL, - is_primary BOOLEAN NOT NULL, - slot INT2 NOT NULL, + pool_id UUID NOT NULL, + kind omicron.public.dataset_kind NOT NULL, + zone_name TEXT, + quota INT8, + reservation INT8, + compression TEXT NOT NULL, + CONSTRAINT zone_name_for_zone_kind CHECK ( + (kind != 'zone') OR + (kind = 'zone' AND zone_name IS NOT NULL) + ), PRIMARY KEY (inv_collection_id, sled_config_id, id) ); diff --git a/schema/crdb/inventory-omicron-sled-config/up24.sql b/schema/crdb/inventory-omicron-sled-config/up24.sql index 7654cd2051c..707e494cf25 100644 --- a/schema/crdb/inventory-omicron-sled-config/up24.sql +++ b/schema/crdb/inventory-omicron-sled-config/up24.sql @@ -1,16 +1,10 @@ -CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_dataset ( +CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_disk ( inv_collection_id UUID NOT NULL, sled_config_id UUID NOT NULL, id UUID NOT NULL, + vendor TEXT NOT NULL, + serial TEXT NOT NULL, + model TEXT NOT NULL, pool_id UUID NOT NULL, - kind omicron.public.dataset_kind NOT NULL, - zone_name TEXT, - quota INT8, - reservation INT8, - compression TEXT NOT NULL, - CONSTRAINT zone_name_for_zone_kind CHECK ( - (kind != 'zone') OR - (kind = 'zone' AND zone_name IS NOT NULL) - ), PRIMARY KEY (inv_collection_id, sled_config_id, id) ); diff --git a/schema/crdb/inventory-omicron-sled-config/up25.sql b/schema/crdb/inventory-omicron-sled-config/up25.sql deleted file mode 100644 index 707e494cf25..00000000000 --- a/schema/crdb/inventory-omicron-sled-config/up25.sql +++ /dev/null @@ -1,10 +0,0 @@ -CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_sled_config_disk ( - inv_collection_id UUID NOT NULL, - sled_config_id UUID NOT NULL, - id UUID NOT NULL, - vendor TEXT NOT NULL, - serial TEXT NOT NULL, - model TEXT NOT NULL, - pool_id UUID NOT NULL, - PRIMARY KEY (inv_collection_id, sled_config_id, id) -); From ddea0c803cf0f958a305a3af22c23b1dd4a5b3ed Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 21 May 2025 14:33:57 -0400 Subject: [PATCH 22/22] add and use strongly-typed OmicronSledConfigUuid --- nexus/db-model/src/inventory.rs | 53 ++++++----- .../db-queries/src/db/datastore/inventory.rs | 88 ++++++++++++------- uuid-kinds/src/lib.rs | 5 ++ 3 files changed, 92 insertions(+), 54 deletions(-) diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 79fb12bb137..eb41c2091f6 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -57,6 +57,8 @@ use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::MupdateOverrideKind; use omicron_uuid_kinds::MupdateOverrideUuid; +use omicron_uuid_kinds::OmicronSledConfigKind; +use omicron_uuid_kinds::OmicronSledConfigUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledKind; use omicron_uuid_kinds::SledUuid; @@ -811,11 +813,12 @@ pub struct InvSledAgent { pub usable_physical_ram: ByteCount, pub reservoir_size: ByteCount, // Soft foreign key to an `InvOmicronSledConfig` - pub ledgered_sled_config: Option, + pub ledgered_sled_config: Option>, // Soft foreign key to an `InvOmicronSledConfig`. May or may not be the same // as `ledgered_sled_config`. - pub last_reconciliation_sled_config: Option, + pub last_reconciliation_sled_config: + Option>, #[diesel(embed)] pub reconciler_status: InvConfigReconcilerStatus, @@ -829,7 +832,8 @@ pub struct InvConfigReconcilerStatus { // Soft foreign key to an `InvOmicronSledConfig`. May or may not be the same // as either of the other sled config keys above. Only populated if // `reconciler_status_kind` is `Running`. - pub reconciler_status_sled_config: Option, + pub reconciler_status_sled_config: + Option>, // Interpretation varies based on `reconciler_status_kind`: // // * `NotYetRun` - always `None` @@ -856,7 +860,7 @@ impl InvConfigReconcilerStatus { get_config: F, ) -> anyhow::Result where - F: FnOnce(&Uuid) -> Option, + F: FnOnce(&OmicronSledConfigUuid) -> Option, { let status = match self.reconciler_status_kind { InvConfigReconcilerStatusKind::NotYetRun => { @@ -866,7 +870,7 @@ impl InvConfigReconcilerStatus { let config_id = self.reconciler_status_sled_config.context( "missing reconciler status sled config for kind 'running'", )?; - let config = get_config(&config_id) + let config = get_config(&config_id.into()) .context("missing sled config we should have fetched")?; ConfigReconcilerInventoryStatus::Running { config, @@ -918,8 +922,8 @@ impl InvSledAgent { pub fn new_without_baseboard( collection_id: CollectionUuid, sled_agent: &nexus_types::inventory::SledAgent, - ledgered_sled_config: Option, - last_reconciliation_sled_config: Option, + ledgered_sled_config: Option, + last_reconciliation_sled_config: Option, reconciler_status: InvConfigReconcilerStatus, ) -> Result { // It's irritating to have to check this case at runtime. The challenge @@ -958,8 +962,9 @@ impl InvSledAgent { sled_agent.usable_physical_ram, ), reservoir_size: ByteCount::from(sled_agent.reservoir_size), - ledgered_sled_config, - last_reconciliation_sled_config, + ledgered_sled_config: ledgered_sled_config.map(From::from), + last_reconciliation_sled_config: + last_reconciliation_sled_config.map(From::from), reconciler_status, }) } @@ -1406,7 +1411,7 @@ impl From for nexus_types::inventory::Dataset { #[diesel(table_name = inv_omicron_sled_config)] pub struct InvOmicronSledConfig { pub inv_collection_id: DbTypedUuid, - pub id: Uuid, + pub id: DbTypedUuid, pub generation: Generation, pub remove_mupdate_override: Option>, } @@ -1414,13 +1419,13 @@ pub struct InvOmicronSledConfig { impl InvOmicronSledConfig { pub fn new( inv_collection_id: CollectionUuid, - id: Uuid, + id: OmicronSledConfigUuid, generation: external::Generation, remove_mupdate_override: Option, ) -> Self { Self { inv_collection_id: inv_collection_id.into(), - id, + id: id.into(), generation: Generation(generation), remove_mupdate_override: remove_mupdate_override.map(From::from), } @@ -1513,7 +1518,7 @@ impl From for ZoneType { #[diesel(table_name = inv_omicron_sled_config_disk)] pub struct InvOmicronSledConfigDisk { pub inv_collection_id: DbTypedUuid, - pub sled_config_id: Uuid, + pub sled_config_id: DbTypedUuid, pub id: DbTypedUuid, pub vendor: String, @@ -1526,12 +1531,12 @@ pub struct InvOmicronSledConfigDisk { impl InvOmicronSledConfigDisk { pub fn new( inv_collection_id: CollectionUuid, - sled_config_id: Uuid, + sled_config_id: OmicronSledConfigUuid, disk_config: OmicronPhysicalDiskConfig, ) -> Self { Self { inv_collection_id: inv_collection_id.into(), - sled_config_id, + sled_config_id: sled_config_id.into(), id: disk_config.id.into(), vendor: disk_config.identity.vendor, serial: disk_config.identity.serial, @@ -1560,7 +1565,7 @@ impl From for OmicronPhysicalDiskConfig { #[diesel(table_name = inv_omicron_sled_config_dataset)] pub struct InvOmicronSledConfigDataset { pub inv_collection_id: DbTypedUuid, - pub sled_config_id: Uuid, + pub sled_config_id: DbTypedUuid, pub id: DbTypedUuid, pub pool_id: DbTypedUuid, @@ -1575,12 +1580,12 @@ pub struct InvOmicronSledConfigDataset { impl InvOmicronSledConfigDataset { pub fn new( inv_collection_id: CollectionUuid, - sled_config_id: Uuid, + sled_config_id: OmicronSledConfigUuid, dataset_config: &DatasetConfig, ) -> Self { Self { inv_collection_id: inv_collection_id.into(), - sled_config_id, + sled_config_id: sled_config_id.into(), id: dataset_config.id.into(), pool_id: dataset_config.name.pool().id().into(), kind: dataset_config.name.kind().into(), @@ -1632,7 +1637,7 @@ impl_enum_type!( #[diesel(table_name = inv_omicron_sled_config_zone)] pub struct InvOmicronSledConfigZone { pub inv_collection_id: DbTypedUuid, - pub sled_config_id: Uuid, + pub sled_config_id: DbTypedUuid, pub id: DbTypedUuid, pub zone_type: ZoneType, pub primary_service_ip: ipv6::Ipv6Addr, @@ -1659,7 +1664,7 @@ pub struct InvOmicronSledConfigZone { impl InvOmicronSledConfigZone { pub fn new( inv_collection_id: CollectionUuid, - sled_config_id: Uuid, + sled_config_id: OmicronSledConfigUuid, zone: &OmicronZoneConfig, ) -> Result { let (image_source, image_artifact_sha256) = match &zone.image_source { @@ -1677,7 +1682,7 @@ impl InvOmicronSledConfigZone { // Fill in the known fields that don't require inspecting // `zone.zone_type` inv_collection_id: inv_collection_id.into(), - sled_config_id, + sled_config_id: sled_config_id.into(), id: zone.id.into(), filesystem_pool: zone .filesystem_pool @@ -2027,7 +2032,7 @@ impl InvOmicronSledConfigZone { #[diesel(table_name = inv_omicron_sled_config_zone_nic)] pub struct InvOmicronSledConfigZoneNic { inv_collection_id: DbTypedUuid, - pub sled_config_id: Uuid, + pub sled_config_id: DbTypedUuid, pub id: Uuid, name: Name, ip: IpNetwork, @@ -2056,7 +2061,7 @@ impl From for OmicronZoneNic { impl InvOmicronSledConfigZoneNic { pub fn new( inv_collection_id: CollectionUuid, - sled_config_id: Uuid, + sled_config_id: OmicronSledConfigUuid, zone: &OmicronZoneConfig, ) -> Result, anyhow::Error> { let Some(nic) = zone.zone_type.service_vnic() else { @@ -2065,7 +2070,7 @@ impl InvOmicronSledConfigZoneNic { let nic = OmicronZoneNic::new(zone.id, nic)?; Ok(Some(Self { inv_collection_id: inv_collection_id.into(), - sled_config_id, + sled_config_id: sled_config_id.into(), id: nic.id, name: nic.name, ip: nic.ip, diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 1491658a416..4b84da350d0 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -25,7 +25,7 @@ use diesel::expression::SelectableHelper; use diesel::sql_types::Nullable; use futures::FutureExt; use futures::future::BoxFuture; -use id_map::IdMap; +use id_map::{IdMap, IdMappable}; use nexus_db_errors::ErrorHandler; use nexus_db_errors::public_error_from_diesel; use nexus_db_errors::public_error_from_diesel_lookup; @@ -87,6 +87,7 @@ use omicron_common::bail_unless; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronSledConfigUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; @@ -1080,12 +1081,15 @@ impl DataStore { ) .into_sql::(), ledgered_sled_config + .map(|id| id.into_untyped_uuid()) .into_sql::>(), last_reconciliation_sled_config + .map(|id| id.into_untyped_uuid()) .into_sql::>(), reconciler_status.reconciler_status_kind .into_sql::(), reconciler_status.reconciler_status_sled_config + .map(|id| id.into_untyped_uuid()) .into_sql::>(), reconciler_status.reconciler_status_timestamp .into_sql::>(), @@ -2392,10 +2396,10 @@ impl DataStore { // It does not contain the actual disk/dataset/zone configs; we'll start // with empty sets and build those up by reading additional tables // below. - let mut omicron_sled_configs: BTreeMap = { + let mut omicron_sled_configs = { use nexus_db_schema::schema::inv_omicron_sled_config::dsl; - let mut configs = BTreeMap::new(); + let mut configs = IdMap::new(); let mut paginator = Paginator::new(batch_size); while let Some(p) = paginator.next() { @@ -2412,10 +2416,10 @@ impl DataStore { public_error_from_diesel(e, ErrorHandler::Server) })?; paginator = p.found_batch(&batch, &|row| row.id); - configs.extend(batch.into_iter().map(|sled_config| { - ( - sled_config.id, - OmicronSledConfig { + for sled_config in batch { + configs.insert(OmicronSledConfigWithId { + id: sled_config.id.into(), + config: OmicronSledConfig { generation: sled_config.generation.into(), remove_mupdate_override: sled_config .remove_mupdate_override @@ -2424,8 +2428,8 @@ impl DataStore { datasets: IdMap::default(), zones: IdMap::default(), }, - ) - })) + }); + } } configs @@ -2510,8 +2514,8 @@ impl DataStore { }) }) .transpose()?; - let config = omicron_sled_configs - .get_mut(&z.sled_config_id) + let mut config_with_id = omicron_sled_configs + .get_mut(&z.sled_config_id.into()) .ok_or_else(|| { // This error means that we found a row in // inv_omicron_sled_config_zone with no associated record in @@ -2531,7 +2535,7 @@ impl DataStore { .map_err(|e| { Error::internal_error(&format!("{:#}", e.to_string())) })?; - config.zones.insert(zone); + config_with_id.config.zones.insert(zone); } bail_unless!( @@ -2561,17 +2565,19 @@ impl DataStore { paginator = p.found_batch(&batch, &|row| row.id); for row in batch { - let config = omicron_sled_configs - .get_mut(&row.sled_config_id) + let mut config_with_id = omicron_sled_configs + .get_mut(&row.sled_config_id.into()) .ok_or_else(|| { Error::internal_error(&format!( "dataset config {:?}: unknown config ID: {:?}", row.id, row.sled_config_id )) })?; - config.datasets.insert(row.try_into().map_err(|e| { - Error::internal_error(&format!("{e:#}")) - })?); + config_with_id.config.datasets.insert( + row.try_into().map_err(|e| { + Error::internal_error(&format!("{e:#}")) + })?, + ); } } } @@ -2597,15 +2603,15 @@ impl DataStore { paginator = p.found_batch(&batch, &|row| row.id); for row in batch { - let config = omicron_sled_configs - .get_mut(&row.sled_config_id) + let mut config_with_id = omicron_sled_configs + .get_mut(&row.sled_config_id.into()) .ok_or_else(|| { Error::internal_error(&format!( "disk config {:?}: unknown config ID: {:?}", row.id, row.sled_config_id )) })?; - config.disks.insert(row.into()); + config_with_id.config.disks.insert(row.into()); } } } @@ -2773,7 +2779,9 @@ impl DataStore { let ledgered_sled_config = s .ledgered_sled_config .map(|id| { - omicron_sled_configs.get(&id).cloned().ok_or_else(|| { + omicron_sled_configs.get(&id.into()).as_ref() + .map(|c| c.config.clone()) + .ok_or_else(|| { Error::internal_error( "missing sled config that we should have fetched", ) @@ -2783,21 +2791,26 @@ impl DataStore { let reconciler_status = s .reconciler_status .to_status(|config_id| { - omicron_sled_configs.get(config_id).cloned() + omicron_sled_configs + .get(config_id) + .as_ref() + .map(|c| c.config.clone()) }) .map_err(|e| Error::internal_error(&format!("{e:#}")))?; let last_reconciliation = s .last_reconciliation_sled_config .map(|id| { let last_reconciled_config = omicron_sled_configs - .get(&id) - .cloned() + .get(&id.into()) + .as_ref() .ok_or_else(|| { Error::internal_error( "missing sled config that we \ should have fetched", ) - })?; + })? + .config + .clone(); Ok::<_, Error>(ConfigReconcilerInventory { last_reconciled_config, external_disks: last_reconciliation_disk_results @@ -2888,10 +2901,24 @@ impl DataStore { } } +#[derive(Debug)] +struct OmicronSledConfigWithId { + id: OmicronSledConfigUuid, + config: OmicronSledConfig, +} + +impl IdMappable for OmicronSledConfigWithId { + type Id = OmicronSledConfigUuid; + + fn id(&self) -> Self::Id { + self.id + } +} + #[derive(Debug)] struct ConfigReconcilerFields { - ledgered_sled_config: Option, - last_reconciliation_sled_config: Option, + ledgered_sled_config: Option, + last_reconciliation_sled_config: Option, reconciler_status: InvConfigReconcilerStatus, } @@ -3022,7 +3049,8 @@ impl ConfigReconcilerRows { InvConfigReconcilerStatus { reconciler_status_kind: InvConfigReconcilerStatusKind::Running, - reconciler_status_sled_config, + reconciler_status_sled_config: + reconciler_status_sled_config.map(to_db_typed_uuid), reconciler_status_timestamp: Some(*started_at), reconciler_status_duration_secs: Some( running_for.as_secs_f64(), @@ -3057,8 +3085,8 @@ impl ConfigReconcilerRows { &mut self, collection_id: CollectionUuid, config: &OmicronSledConfig, - ) -> anyhow::Result { - let sled_config_id = Uuid::new_v4(); + ) -> anyhow::Result { + let sled_config_id = OmicronSledConfigUuid::new_v4(); self.sled_configs.push(InvOmicronSledConfig::new( collection_id, diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 1e343ecfc74..3f7806d836c 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -64,6 +64,11 @@ impl_typed_uuid_kind! { Instance => "instance", LoopbackAddress => "loopback_address", MupdateOverride => "mupdate_override", + // `OmicronSledConfig`s do not themselves contain IDs, but we generate IDs + // for them when they're serialized to the database during inventory + // collection. This ID type is therefore only used by nexus-db-model and + // nexus-db-queries. + OmicronSledConfig => "omicron_sled_config", OmicronZone => "service", PhysicalDisk => "physical_disk", Propolis => "propolis",