Skip to content

Commit

Permalink
Flesh out status report from reconfigurator rendezvous RPW (#7401)
Browse files Browse the repository at this point in the history
Example output from `omicron-dev run-all` (so no meaningful rows, but
just checking the field formatting and omdb deserialization):

```
task: "blueprint_rendezvous"
  configured period: every 5m
  currently executing: no
  last completed activation: iter 5, triggered by a dependent task completing
    started at 2025-01-24T21:07:40.740Z (222s ago) and ran for 68ms
    target blueprint:     72d199ef-d6bd-4895-8858-006f3ed212ad
    inventory collection: 7dfbf99c-3613-498c-a871-cfb9b2945abe
    debug_dataset rendezvous counts:
        num_inserted:           0
        num_already_exist:      0
        num_not_in_inventory:   0
        num_tombstoned:         0
        num_already_tombstoned: 0
    crucible_dataset rendezvous counts:
        num_inserted:         0
        num_already_exist:    0
        num_not_in_inventory: 0
```

Closes #7392.
  • Loading branch information
jgallagher authored Jan 28, 2025
1 parent 8f7a673 commit 2f1373e
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 14 deletions.
21 changes: 21 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,27 @@ fn print_task_blueprint_rendezvous(details: &serde_json::Value) {
" inventory collection: {}",
status.inventory_collection_id
);
println!(" debug_dataset rendezvous counts:");
println!(
" num_inserted: {}",
status.stats.debug_dataset.num_inserted
);
println!(
" num_already_exist: {}",
status.stats.debug_dataset.num_already_exist
);
println!(
" num_not_in_inventory: {}",
status.stats.debug_dataset.num_not_in_inventory
);
println!(
" num_tombstoned: {}",
status.stats.debug_dataset.num_tombstoned
);
println!(
" num_already_tombstoned: {}",
status.stats.debug_dataset.num_already_tombstoned
);
println!(" crucible_dataset rendezvous counts:");
println!(
" num_inserted: {}",
Expand Down
80 changes: 68 additions & 12 deletions nexus/reconfigurator/rendezvous/src/debug_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use nexus_db_queries::db::model::RendezvousDebugDataset;
use nexus_db_queries::db::DataStore;
use nexus_types::deployment::BlueprintDatasetConfig;
use nexus_types::deployment::BlueprintDatasetDisposition;
use nexus_types::internal_api::background::DebugDatasetsRendezvousStats;
use omicron_common::api::internal::shared::DatasetKind;
use omicron_uuid_kinds::BlueprintUuid;
use omicron_uuid_kinds::DatasetUuid;
Expand All @@ -23,7 +24,7 @@ pub(crate) async fn reconcile_debug_datasets(
blueprint_id: BlueprintUuid,
blueprint_datasets: impl Iterator<Item = &BlueprintDatasetConfig>,
inventory_datasets: &BTreeSet<DatasetUuid>,
) -> anyhow::Result<()> {
) -> anyhow::Result<DebugDatasetsRendezvousStats> {
// We expect basically all executions of this task to do nothing: we're
// activated periodically, and only do work when a dataset has been
// newly-added or newly-expunged.
Expand All @@ -39,27 +40,42 @@ pub(crate) async fn reconcile_debug_datasets(
.map(|d| (d.id(), d))
.collect::<BTreeMap<_, _>>();

let mut stats = DebugDatasetsRendezvousStats::default();

for dataset in blueprint_datasets.filter(|d| d.kind == DatasetKind::Debug) {
match dataset.disposition {
BlueprintDatasetDisposition::InService => {
// Only attempt to insert this dataset if it has shown up in
// inventory (required for correctness) and isn't already
// present in the db (performance optimization only). Inserting
// an already-present row is a no-op, so it's safe to skip.
if inventory_datasets.contains(&dataset.id)
&& !existing_db_datasets.contains_key(&dataset.id)
{
if existing_db_datasets.contains_key(&dataset.id) {
stats.num_already_exist += 1;
} else if !inventory_datasets.contains(&dataset.id) {
stats.num_not_in_inventory += 1;
} else {
let db_dataset = RendezvousDebugDataset::new(
dataset.id,
dataset.pool.id(),
blueprint_id,
);
datastore
let did_insert = datastore
.debug_dataset_insert_if_not_exists(opctx, db_dataset)
.await
.with_context(|| {
format!("failed to insert dataset {}", dataset.id)
})?;
})?
.is_some();

if did_insert {
stats.num_inserted += 1;
} else {
// This means we hit the TOCTOU race mentioned above:
// when we queried the DB this row didn't exist, but
// another Nexus must have beat us to actually inserting
// it.
stats.num_already_exist += 1;
}
}
}
BlueprintDatasetDisposition::Expunged => {
Expand All @@ -81,7 +97,9 @@ pub(crate) async fn reconcile_debug_datasets(
.get(&dataset.id)
.map(|d| d.is_tombstoned())
.unwrap_or(false);
if !already_tombstoned {
if already_tombstoned {
stats.num_already_tombstoned += 1;
} else {
if datastore
.debug_dataset_tombstone(
opctx,
Expand All @@ -96,17 +114,23 @@ pub(crate) async fn reconcile_debug_datasets(
)
})?
{
stats.num_tombstoned += 1;
info!(
opctx.log, "tombstoned expunged dataset";
"dataset_id" => %dataset.id,
);
} else {
// Similar TOCTOU race lost as above; this dataset was
// either already tombstoned by another racing Nexus, or
// has been hard deleted.
stats.num_already_tombstoned += 1;
}
}
}
}
}

Ok(())
Ok(stats)
}

#[cfg(test)]
Expand Down Expand Up @@ -213,15 +237,15 @@ mod tests {
))| {
let blueprint_id = BlueprintUuid::new_v4();

let datastore_datasets = runtime.block_on(async {
let (result_stats, datastore_datasets) = runtime.block_on(async {
let (blueprint_datasets, inventory_datasets) = proptest_do_prep(
opctx,
datastore,
blueprint_id,
&prep,
).await;

reconcile_debug_datasets(
let result_stats = reconcile_debug_datasets(
opctx,
datastore,
blueprint_id,
Expand All @@ -231,15 +255,19 @@ mod tests {
.await
.expect("reconciled debug dataset");

datastore
let datastore_datasets = datastore
.debug_dataset_list_all_batched(opctx)
.await
.unwrap()
.into_iter()
.map(|d| (d.id(), d))
.collect::<BTreeMap<_, _>>()
.collect::<BTreeMap<_, _>>();

(result_stats, datastore_datasets)
});

let mut expected_stats = DebugDatasetsRendezvousStats::default();

for (id, prep) in prep {
let id: DatasetUuid = u32_to_id(id);

Expand All @@ -252,6 +280,32 @@ mod tests {
prep.disposition == ArbitraryDisposition::InService;
let in_inventory = prep.in_inventory;

// Validate rendezvous output
match (in_db_before, in_service, in_inventory) {
// "Not in database and expunged" is consistent with "hard
// deleted", which we can't separate from "already
// tombstoned".
(false, false, _) => {
expected_stats.num_already_tombstoned += 1;
}
// "In database and expunged" should result in tombstoning.
(true, false, _) => {
expected_stats.num_tombstoned += 1;
}
// In service but already existed
(true, true, _) => {
expected_stats.num_already_exist += 1;
}
// In service, not in db yet, but not in inventory
(false, true, false) => {
expected_stats.num_not_in_inventory += 1;
}
// In service, not in db yet, present in inventory
(false, true, true) => {
expected_stats.num_inserted += 1;
}
}

// Validate database state
match (in_db_before, in_service, in_inventory) {
// Wasn't in DB, isn't in service: should still not be in db
Expand Down Expand Up @@ -298,6 +352,8 @@ mod tests {
}
}
}

assert_eq!(result_stats, expected_stats);
});

runtime.block_on(db.terminate());
Expand Down
4 changes: 2 additions & 2 deletions nexus/reconfigurator/rendezvous/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub async fn reconcile_blueprint_rendezvous_tables(
.flat_map(|sled| sled.datasets.iter().flat_map(|d| d.id))
.collect();

debug_dataset::reconcile_debug_datasets(
let debug_dataset = debug_dataset::reconcile_debug_datasets(
opctx,
datastore,
blueprint.id,
Expand All @@ -51,7 +51,7 @@ pub async fn reconcile_blueprint_rendezvous_tables(
)
.await?;

Ok(BlueprintRendezvousStats { crucible_dataset })
Ok(BlueprintRendezvousStats { debug_dataset, crucible_dataset })
}

#[cfg(test)]
Expand Down
51 changes: 51 additions & 0 deletions nexus/types/src/internal_api/background.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ pub struct BlueprintRendezvousStatus {

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub struct BlueprintRendezvousStats {
pub debug_dataset: DebugDatasetsRendezvousStats,
pub crucible_dataset: CrucibleDatasetsRendezvousStats,
}

Expand Down Expand Up @@ -283,3 +284,53 @@ impl slog::KV for CrucibleDatasetsRendezvousStats {
Ok(())
}
}

#[derive(
Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize,
)]
pub struct DebugDatasetsRendezvousStats {
/// Number of new Debug datasets recorded.
///
/// This is a count of in-service Debug datasets that were also present
/// in inventory and newly-inserted into `rendezvous_debug_dataset`.
pub num_inserted: usize,
/// Number of Debug datasets that would have been inserted, except
/// records for them already existed.
pub num_already_exist: usize,
/// Number of Debug datasets that the current blueprint says are
/// in-service, but we did not attempt to insert them because they're not
/// present in the latest inventory collection.
pub num_not_in_inventory: usize,
/// Number of Debug datasets that we tombstoned based on their disposition
/// in the current blueprint being expunged.
pub num_tombstoned: usize,
/// Number of Debug datasets that we would have tombstoned, except they were
/// already tombstoned or deleted.
pub num_already_tombstoned: usize,
}

impl slog::KV for DebugDatasetsRendezvousStats {
fn serialize(
&self,
_record: &slog::Record,
serializer: &mut dyn slog::Serializer,
) -> slog::Result {
let Self {
num_inserted,
num_already_exist,
num_not_in_inventory,
num_tombstoned,
num_already_tombstoned,
} = *self;
serializer.emit_usize("num_inserted".into(), num_inserted)?;
serializer.emit_usize("num_already_exist".into(), num_already_exist)?;
serializer
.emit_usize("num_not_in_inventory".into(), num_not_in_inventory)?;
serializer.emit_usize("num_tombstoned".into(), num_tombstoned)?;
serializer.emit_usize(
"num_already_tombstoned".into(),
num_already_tombstoned,
)?;
Ok(())
}
}

0 comments on commit 2f1373e

Please sign in to comment.