diff --git a/dev-tools/omdb/src/bin/omdb/db/sitrep.rs b/dev-tools/omdb/src/bin/omdb/db/sitrep.rs index 439798d577b..4fe302465de 100644 --- a/dev-tools/omdb/src/bin/omdb/db/sitrep.rs +++ b/dev-tools/omdb/src/bin/omdb/db/sitrep.rs @@ -238,7 +238,7 @@ async fn cmd_db_sitrep_show( } }; - let fm::Sitrep { metadata } = sitrep; + let fm::Sitrep { metadata, cases } = sitrep; let fm::SitrepMetadata { id, creator_id, @@ -345,5 +345,12 @@ async fn cmd_db_sitrep_show( } } + if !cases.is_empty() { + println!("\n{:-<80}\n", "== CASES"); + for case in cases { + println!("{}", case.display_indented(4, Some(id))); + } + } + Ok(()) } diff --git a/ereport/types/src/lib.rs b/ereport/types/src/lib.rs index af77d9297b5..440a3dd78e5 100644 --- a/ereport/types/src/lib.rs +++ b/ereport/types/src/lib.rs @@ -32,6 +32,7 @@ pub struct Ereport { Serialize, Deserialize, JsonSchema, + Hash, )] #[repr(transparent)] #[serde(from = "u64", into = "u64")] @@ -102,7 +103,18 @@ impl TryFrom for Ena { } /// Unique identifier for an ereport. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + Serialize, + Deserialize, + PartialOrd, + Ord, + Hash, +)] pub struct EreportId { pub restart_id: EreporterRestartUuid, pub ena: Ena, diff --git a/nexus/db-model/src/fm.rs b/nexus/db-model/src/fm.rs index d9d7ac3c2dc..e14149ae170 100644 --- a/nexus/db-model/src/fm.rs +++ b/nexus/db-model/src/fm.rs @@ -19,6 +19,11 @@ use chrono::{DateTime, Utc}; use nexus_db_schema::schema::{fm_sitrep, fm_sitrep_history}; use omicron_uuid_kinds::{CollectionKind, OmicronZoneKind, SitrepKind}; +mod case; +pub use case::*; +mod diagnosis_engine; +pub use diagnosis_engine::*; + #[derive(Queryable, Insertable, Clone, Debug, Selectable)] #[diesel(table_name = fm_sitrep)] pub struct SitrepMetadata { diff --git a/nexus/db-model/src/fm/case.rs b/nexus/db-model/src/fm/case.rs new file mode 100644 index 00000000000..896d3db27fd --- /dev/null +++ b/nexus/db-model/src/fm/case.rs @@ -0,0 +1,135 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Fault management cases. + +use super::DiagnosisEngine; +use crate::DbTypedUuid; +use crate::ereport; +use nexus_db_schema::schema::{fm_case, fm_ereport_in_case}; +use nexus_types::fm; +use omicron_uuid_kinds::{ + CaseEreportKind, CaseKind, EreporterRestartKind, SitrepKind, SitrepUuid, +}; + +/// Metadata describing a fault management case. +/// +/// This corresponds to the fields in the `fm_case` table. +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = fm_case)] +pub struct CaseMetadata { + /// The ID of this case. + pub id: DbTypedUuid, + /// The ID of the sitrep in which the case has this state. + pub sitrep_id: DbTypedUuid, + /// The diagnosis engine which owns this case. + pub de: DiagnosisEngine, + + /// The ID of the sitrep in which this case was created. + pub created_sitrep_id: DbTypedUuid, + /// If this case is closed, the ID of the sitrep in which it was closed. + /// + /// If this field is non-null, then the case has been closed. Closed cases + /// need not be copied forward into child sitreps that descend from the + /// sitrep in which the case was closed. + pub closed_sitrep_id: Option>, + + /// An optional, human-readable comment describing this case. + /// + /// Sitrep comments are intended for debugging purposes only; i.e., they are + /// visible to Oxide support via OMDB, but are not presented to the + /// operator. The contents of comment fields are not stable, and a DE may + /// emit a different comment string for an analogous determination across + /// different software versions. + pub comment: String, +} + +/// An association between an ereport and a case. +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = fm_ereport_in_case)] +pub struct CaseEreport { + /// The ID of this association. This is used primarily for pagination. + pub id: DbTypedUuid, + /// The restart ID of the reporter that produced this ereport. + pub restart_id: DbTypedUuid, + /// The ENA of the ereport within that reporter restart. + /// + /// As long as this `CaseEreport` entry exists, the corresponding entry in + /// the `ereport` table with this restart ID and ENA pair is assumed to also + /// exist. + pub ena: ereport::DbEna, + /// ID of the case. + /// + /// This corresponds to a record in `fm_case` with this case ID and sitrep ID. + pub case_id: DbTypedUuid, + /// ID of the current sitrep in which this association exists. + pub sitrep_id: DbTypedUuid, + /// ID of the first sitrep in which this association was added. + /// + /// Since all relevant data for open cases is copied forward into new + /// sitreps, this field exists primarily for debugging purposes. There is + /// nothing that the sitrep in which an ereport was first assigned to a case + /// can tell you which the current sitrep cannot. + pub assigned_sitrep_id: DbTypedUuid, + /// An optional, human-readable comment added by the diagnosis engine to + /// explain why it felt that this ereport is related to this case. + /// + /// Sitrep comments are intended for debugging purposes only; i.e., they are + /// visible to Oxide support via OMDB, but are not presented to the + /// operator. The contents of comment fields are not stable, and a DE may + /// emit a different comment string for an analogous determination across + /// different software versions. + pub comment: String, +} + +/// The complete state of a case in a particular sitrep, consisting of the +/// [`CaseMetadata`] record and any other records belonging to the case. +#[derive(Clone, Debug)] +pub struct Case { + pub metadata: CaseMetadata, + pub ereports: Vec, +} + +impl Case { + pub fn from_sitrep(sitrep_id: SitrepUuid, case: fm::Case) -> Self { + let sitrep_id = sitrep_id.into(); + let case_id = case.id.into(); + let ereports = case + .ereports + .into_iter() + .map( + |fm::case::CaseEreport { + id, + ereport, + assigned_sitrep_id, + comment, + }| { + let restart_id = ereport.id().restart_id.into(); + let ena = ereport.id().ena.into(); + CaseEreport { + id: id.into(), + case_id, + restart_id, + ena, + comment, + sitrep_id, + assigned_sitrep_id: assigned_sitrep_id.into(), + } + }, + ) + .collect(); + + Self { + metadata: CaseMetadata { + id: case_id, + sitrep_id, + de: case.de.into(), + created_sitrep_id: case.created_sitrep_id.into(), + closed_sitrep_id: case.closed_sitrep_id.map(Into::into), + comment: case.comment, + }, + ereports, + } + } +} diff --git a/nexus/db-model/src/fm/diagnosis_engine.rs b/nexus/db-model/src/fm/diagnosis_engine.rs new file mode 100644 index 00000000000..7d354142bbb --- /dev/null +++ b/nexus/db-model/src/fm/diagnosis_engine.rs @@ -0,0 +1,50 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::impl_enum_type; +use nexus_types::fm; +use serde::{Deserialize, Serialize}; +use std::fmt; + +impl_enum_type!( + DiagnosisEngineEnum: + + #[derive( + Copy, + Clone, + Debug, + PartialEq, + Serialize, + Deserialize, + AsExpression, + FromSqlRow, + )] + #[serde(rename_all = "snake_case")] + pub enum DiagnosisEngine; + + PowerShelf => b"power_shelf" + +); + +impl From for fm::DiagnosisEngineKind { + fn from(de: DiagnosisEngine) -> Self { + match de { + DiagnosisEngine::PowerShelf => fm::DiagnosisEngineKind::PowerShelf, + } + } +} + +impl From for DiagnosisEngine { + fn from(fm_de: fm::DiagnosisEngineKind) -> Self { + match fm_de { + fm::DiagnosisEngineKind::PowerShelf => DiagnosisEngine::PowerShelf, + } + } +} + +impl fmt::Display for DiagnosisEngine { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fm::DiagnosisEngineKind::from(*self).fmt(f) + } +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 95e14165dd6..692cf62b839 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -42,7 +42,7 @@ mod downstairs; pub mod ereport; mod ereporter_type; mod external_ip; -mod fm; +pub mod fm; mod generation; mod identity_provider; mod image; @@ -191,7 +191,7 @@ pub use downstairs::*; pub use ereport::Ereport; pub use ereporter_type::*; pub use external_ip::*; -pub use fm::*; +pub use fm::{SitrepMetadata, SitrepVersion}; pub use generation::*; pub use identity_provider::*; pub use image::*; diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 6b91f91804d..a9f9ef99b8e 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(212, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(213, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(213, "fm-cases"), KnownVersion::new(212, "local-storage-disk-type"), KnownVersion::new(211, "blueprint-sled-config-subnet"), KnownVersion::new(210, "one-big-ereport-table"), diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index 79fbe44a828..4f995447c35 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -98,6 +98,14 @@ impl DataStore { ) -> LookupResult { opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; let conn = self.pool_connection_authorized(opctx).await?; + self.ereport_fetch_on_conn(&conn, id).await + } + + pub(crate) async fn ereport_fetch_on_conn( + &self, + conn: &async_bb8_diesel::Connection, + id: fm::EreportId, + ) -> LookupResult { let restart_id = id.restart_id.into_untyped_uuid(); let ena = DbEna::from(id.ena); @@ -106,7 +114,7 @@ impl DataStore { .filter(dsl::ena.eq(ena)) .filter(dsl::time_deleted.is_null()) .select(Ereport::as_select()) - .first_async(&*conn) + .first_async(conn) .await .optional() .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? diff --git a/nexus/db-queries/src/db/datastore/fm.rs b/nexus/db-queries/src/db/datastore/fm.rs index f86351e3081..48065f81bbf 100644 --- a/nexus/db-queries/src/db/datastore/fm.rs +++ b/nexus/db-queries/src/db/datastore/fm.rs @@ -12,8 +12,11 @@ use super::DataStore; use crate::authz; use crate::context::OpContext; use crate::db::datastore::RunnableQuery; +use crate::db::datastore::SQL_BATCH_SIZE; use crate::db::model; +use crate::db::model::DbTypedUuid; use crate::db::model::SqlU32; +use crate::db::pagination::Paginator; use crate::db::pagination::paginated; use crate::db::raw_query_builder::QueryBuilder; use crate::db::raw_query_builder::TypedSqlQuery; @@ -26,6 +29,9 @@ use dropshot::PaginationOrder; use nexus_db_errors::ErrorHandler; use nexus_db_errors::public_error_from_diesel; use nexus_db_lookup::DbConnection; +use nexus_db_schema::schema::ereport::dsl as ereport_dsl; +use nexus_db_schema::schema::fm_case::dsl as case_dsl; +use nexus_db_schema::schema::fm_ereport_in_case::dsl as case_ereport_dsl; use nexus_db_schema::schema::fm_sitrep::dsl as sitrep_dsl; use nexus_db_schema::schema::fm_sitrep_history::dsl as history_dsl; use nexus_types::fm; @@ -33,8 +39,13 @@ use nexus_types::fm::Sitrep; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; +use omicron_uuid_kinds::CaseEreportKind; +use omicron_uuid_kinds::CaseKind; +use omicron_uuid_kinds::CaseUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SitrepUuid; +use std::collections::HashMap; +use std::sync::Arc; use uuid::Uuid; impl DataStore { @@ -120,7 +131,8 @@ impl DataStore { Ok(Some((version, sitrep))) } - /// Reads the entire content of the sitrep with the provided ID, if one exists. + /// Reads the entire content of the sitrep with the provided ID, if one + /// exists. pub async fn fm_sitrep_read( &self, opctx: &OpContext, @@ -139,10 +151,179 @@ impl DataStore { let metadata = self.fm_sitrep_metadata_read_on_conn(id, &conn).await?.into(); - // TODO(eliza): this is where we would read all the other sitrep data, - // if there was any. + // Fetch all ereports assigned to cases in this sitrep. We do this by + // querying the `fm_ereport_in_case` table for all entries with this + // sitrep ID, paginated by the ereport assignment's UUID. This query is + // `INNER JOIN`ed with the `fm_ereport` table to fetch the ereport's + // data for that assignment. + // + // We use the results of this query to populate a map of case UUIDs to + // the map of ereports assigned to that case. Ereports are de-duplicated + // using an additional map of `Arc`ed ereports, to reduce the in-memory + // size of the sitrep when an ereport is assigned to multiple cases. the + // JOINed query *will* potentially load the same ereport multiple times + // in that case, but this is still probably much more efficient than + // issuing a bunch of smaller queries to load ereports individually. + let mut case_ereports = + { + // TODO(eliza): as a potential optimization, since ereport + // records are immutable, we might consider hanging onto this + // map of all ereports in the `Sitrep` structure. Then, when we + // load the next sitrep, we could first check if the ereports in + // that sitrep are contained in the map before loading them + // again. That would require changing the rest of this code to + // not `JOIN` with the ereports table here, and instead populate + // a list of additional ereports we need to load, and issue a + // separate query for that. But, it's worth considering maybe if + // this becomes a bottleneck... + let mut ereports = iddqd::IdOrdMap::>::new(); + let mut map = HashMap::>::new(); + + let mut paginator = + Paginator::new(SQL_BATCH_SIZE, PaginationOrder::Descending); + while let Some(p) = paginator.next() { + let batch = DataStore::fm_sitrep_read_ereports_query( + id, + &p.current_pagparams(), + ) + .load_async(conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context( + "failed to load case ereport assignments", + ) + })?; + + paginator = + p.found_batch(&batch, &|(assignment, _)| assignment.id); + for (assignment, ereport) in batch { + let ereport_id = fm::EreportId { + restart_id: ereport.restart_id.into(), + ena: ereport.ena.into(), + }; + let ereport = match ereports.entry(&ereport_id) { + iddqd::id_ord_map::Entry::Occupied(entry) => { + entry.get().clone() + } + iddqd::id_ord_map::Entry::Vacant(entry) => { + let ereport = + Arc::new(fm::Ereport::try_from(ereport)?); + entry.insert(ereport.clone()); + ereport + } + }; + let id = assignment.id.into(); + let case_id = assignment.case_id.into(); + map.entry(case_id).or_default().insert_unique( + fm::case::CaseEreport { + id, + ereport, + assigned_sitrep_id: assignment + .assigned_sitrep_id + .into(), + comment: assignment.comment, + }, + ).map_err(|_| Error::InternalError { internal_message: + format!( + "encountered multiple case ereports for case \ + {case_id} with the same UUID {id}. this should \ + really not be possible, as the assignment UUID \ + is a primary key!", + )})?; + } + } - Ok(Sitrep { metadata }) + map + }; + // Next, load the case metadata entries and marry them to the sets of + // ereports assigned to those cases that we loaded in the previous step. + let cases = { + let mut cases = iddqd::IdOrdMap::new(); + let mut paginator = + Paginator::new(SQL_BATCH_SIZE, PaginationOrder::Descending); + while let Some(p) = paginator.next() { + let batch = self + .fm_sitrep_cases_list_on_conn( + id, + &p.current_pagparams(), + &conn, + ) + .await + .map_err(|e| { + e.internal_context("failed to list sitrep cases") + })?; + paginator = p.found_batch(&batch, &|case| case.id); + cases.extend(batch.into_iter().map(|case| { + let model::fm::CaseMetadata { + id, + sitrep_id: _, + created_sitrep_id, + closed_sitrep_id, + comment, + de, + } = case; + let id = id.into(); + + // Take all the case ereport assignments we've collected for this case. + let ereports = case_ereports + .remove(&id) + // If there's no entry in the map of case ereport + // assignments for this case, then that just means that the + // case has no ereports assigned to it, so insert an empty + // map here. + .unwrap_or_default(); + fm::Case { + id, + created_sitrep_id: created_sitrep_id.into(), + closed_sitrep_id: closed_sitrep_id.map(Into::into), + de: de.into(), + comment, + ereports, + } + })); + } + + cases + }; + + Ok(Sitrep { metadata, cases }) + } + + async fn fm_sitrep_cases_list_on_conn( + &self, + sitrep_id: SitrepUuid, + pagparams: &DataPageParams<'_, DbTypedUuid>, + conn: &async_bb8_diesel::Connection, + ) -> ListResultVec { + paginated(case_dsl::fm_case, case_dsl::id, &pagparams) + .filter(case_dsl::sitrep_id.eq(sitrep_id.into_untyped_uuid())) + .select(model::fm::CaseMetadata::as_select()) + .load_async::(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + fn fm_sitrep_read_ereports_query( + sitrep_id: SitrepUuid, + pagparams: &DataPageParams<'_, DbTypedUuid>, + ) -> impl RunnableQuery<(model::fm::CaseEreport, model::Ereport)> + use<> + { + paginated( + case_ereport_dsl::fm_ereport_in_case, + case_ereport_dsl::id, + pagparams, + ) + .filter(case_ereport_dsl::sitrep_id.eq(sitrep_id.into_untyped_uuid())) + .inner_join( + ereport_dsl::ereport.on(ereport_dsl::restart_id + .eq(case_ereport_dsl::restart_id) + .and(ereport_dsl::ena.eq(case_ereport_dsl::ena))), + ) + .select(( + model::fm::CaseEreport::as_select(), + model::Ereport::as_select(), + )) } /// Insert the provided [`Sitrep`] into the database, and attempt to mark it @@ -171,16 +352,27 @@ impl DataStore { pub async fn fm_sitrep_insert( &self, opctx: &OpContext, - sitrep: &Sitrep, + sitrep: Sitrep, ) -> Result<(), InsertSitrepError> { let conn = self.pool_connection_authorized(opctx).await?; // TODO(eliza): there should probably be an authz object for the fm sitrep? opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let sitrep_id = sitrep.id(); + // Create the sitrep metadata record. + // + // NOTE: we must insert this record before anything else, because it's + // how orphaned sitreps are found when performing garbage collection. + // Were we to first insert some other records and insert the metadata + // record *last*, we could die when we have inserted some sitrep data + // but have yet to create the metadata record. If this occurs, those + // records could not be easily found by the garbage collection task. + // Those (unused) records would then be permanently leaked without + // manual human intervention to delete them. diesel::insert_into(sitrep_dsl::fm_sitrep) - .values(model::SitrepMetadata::from(sitrep.metadata.clone())) + .values(model::SitrepMetadata::from(sitrep.metadata)) .execute_async(&*conn) .await .map_err(|e| { @@ -188,10 +380,45 @@ impl DataStore { .internal_context("failed to insert sitrep metadata record") })?; - // TODO(eliza): other sitrep records would be inserted here... + // Create case records. + let mut cases = Vec::with_capacity(sitrep.cases.len()); + for case in sitrep.cases { + // TODO(eliza): some of this could be done in parallel using a + // `ParallelTaskSet`, if the time it takes to insert a sitrep were + // to become important? + let model::fm::Case { metadata, ereports } = + model::fm::Case::from_sitrep(sitrep_id, case); + + if !ereports.is_empty() { + diesel::insert_into(case_ereport_dsl::fm_ereport_in_case) + .values(ereports) + .execute_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context(format!( + "failed to insert ereport records for case {}", + metadata.id + )) + })?; + } + + cases.push(metadata); + } + + if !cases.is_empty() { + diesel::insert_into(case_dsl::fm_case) + .values(cases) + .execute_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context("failed to insert case records") + })?; + } // Now, try to make the sitrep current. - let query = Self::insert_sitrep_version_query(sitrep.id()); + let query = Self::insert_sitrep_version_query(sitrep_id); query .execute_async(&*conn) .await @@ -202,7 +429,7 @@ impl DataStore { ) if info.message() == Self::PARENT_NOT_CURRENT_ERROR_MESSAGE => { - InsertSitrepError::ParentNotCurrent(sitrep.id()) + InsertSitrepError::ParentNotCurrent(sitrep_id) } err => { let err = @@ -530,9 +757,28 @@ impl DataStore { .map(|id| id.into_untyped_uuid()) .collect::>(); - // TODO(eliza): when other tables are added to store data that is part - // of the sitrep, we'll need to delete any records with matching IDs in - // those tables, too! + // Delete case ereport assignments + let case_ereports_deleted = diesel::delete( + case_ereport_dsl::fm_ereport_in_case + .filter(case_ereport_dsl::sitrep_id.eq_any(ids.clone())), + ) + .execute_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context("failed to delete case ereport assignments") + })?; + + // Delete case metadata records. + let cases_deleted = diesel::delete( + case_dsl::fm_case.filter(case_dsl::sitrep_id.eq_any(ids.clone())), + ) + .execute_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context("failed to delete case metadata") + })?; // Delete the sitrep metadata entries *last*. This is necessary because // the rest of the delete operation is unsynchronized, and it is @@ -541,10 +787,26 @@ impl DataStore { // the one that is used to determine whether a sitrep "exists" so that // the sitrep GC task can determine if it needs to be deleted, so don't // touch it until all the other records are gone. - diesel::delete(sitrep_dsl::fm_sitrep.filter(sitrep_dsl::id.eq_any(ids))) - .execute_async(&*conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + let sitreps_deleted = diesel::delete( + sitrep_dsl::fm_sitrep.filter(sitrep_dsl::id.eq_any(ids.clone())), + ) + .execute_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context("failed to delete sitrep metadata") + })?; + + slog::debug!( + &opctx.log, + "deleted {sitreps_deleted} of {} sitreps sitreps", ids.len(); + "ids" => ?ids, + "sitreps_deleted" => sitreps_deleted, + "cases_deleted" => cases_deleted, + "case_ereports_deleted" => case_ereports_deleted, + ); + + Ok(sitreps_deleted) } pub async fn fm_sitrep_version_list( @@ -734,6 +996,39 @@ mod tests { logctx.cleanup_successful(); } + #[tokio::test] + async fn explain_sitrep_read_ereports_query() { + let logctx = dev::test_setup_log("explain_sitrep_read_ereports_query"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let pool = db.pool(); + let conn = pool.claim().await.unwrap(); + + let pagparams = DataPageParams { + marker: None, + limit: std::num::NonZeroU32::new(420).unwrap(), + direction: dropshot::PaginationOrder::Descending, + }; + let query = DataStore::fm_sitrep_read_ereports_query( + SitrepUuid::nil(), + &pagparams, + ); + let explanation = query + .explain_async(&conn) + .await + .expect("Failed to explain query - is it valid SQL?"); + + eprintln!("{explanation}"); + + assert!( + !explanation.contains("FULL SCAN"), + "Found an unexpected FULL SCAN: {}", + explanation + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_insert_sitrep_without_parent() { // Setup @@ -755,9 +1050,10 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; - datastore.fm_sitrep_insert(&opctx, &sitrep).await.unwrap(); + datastore.fm_sitrep_insert(&opctx, sitrep.clone()).await.unwrap(); let current = datastore .fm_sitrep_read_current(&opctx) @@ -775,8 +1071,10 @@ mod tests { assert_eq!(sitrep.metadata.comment, current_sitrep.metadata.comment); // Trying to insert the same sitrep again should fail. - let err = - datastore.fm_sitrep_insert(&opctx, &sitrep).await.unwrap_err(); + let err = datastore + .fm_sitrep_insert(&opctx, sitrep.clone()) + .await + .unwrap_err(); assert!(err.to_string().contains("duplicate key")); // Clean up. @@ -801,8 +1099,9 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; - datastore.fm_sitrep_insert(&opctx, &sitrep1).await.unwrap(); + datastore.fm_sitrep_insert(&opctx, sitrep1.clone()).await.unwrap(); // Create a second sitrep with the first as parent let sitrep2 = nexus_types::fm::Sitrep { @@ -814,8 +1113,9 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(sitrep1.id()), }, + cases: Default::default(), }; - datastore.fm_sitrep_insert(&opctx, &sitrep2).await.expect( + datastore.fm_sitrep_insert(&opctx, sitrep2.clone()).await.expect( "inserting a sitrep whose parent is current should succeed", ); @@ -854,8 +1154,9 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; - datastore.fm_sitrep_insert(&opctx, &sitrep1).await.unwrap(); + datastore.fm_sitrep_insert(&opctx, sitrep1.clone()).await.unwrap(); // Try to insert a sitrep with a non-existent parent ID let nonexistent_id = SitrepUuid::new_v4(); @@ -868,9 +1169,10 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(nonexistent_id), }, + cases: Default::default(), }; - let result = datastore.fm_sitrep_insert(&opctx, &sitrep2).await; + let result = datastore.fm_sitrep_insert(&opctx, sitrep2).await; // Should fail with ParentNotCurrent error match result { @@ -902,8 +1204,9 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; - datastore.fm_sitrep_insert(&opctx, &sitrep1).await.unwrap(); + datastore.fm_sitrep_insert(&opctx, sitrep1.clone()).await.unwrap(); // Create a second sitrep with the first as parent let sitrep2 = nexus_types::fm::Sitrep { @@ -915,8 +1218,9 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(sitrep1.id()), }, + cases: Default::default(), }; - datastore.fm_sitrep_insert(&opctx, &sitrep2).await.unwrap(); + datastore.fm_sitrep_insert(&opctx, sitrep2.clone()).await.unwrap(); // Try to create a third sitrep with sitrep1 (outdated) as parent. // This should fail, as sitrep2 is now the current sitrep. @@ -929,8 +1233,9 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(sitrep1.id()), }, + cases: Default::default(), }; - let result = datastore.fm_sitrep_insert(&opctx, &sitrep3).await; + let result = datastore.fm_sitrep_insert(&opctx, sitrep3.clone()).await; // Should fail with ParentNotCurrent error match result { @@ -969,9 +1274,10 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; datastore - .fm_sitrep_insert(&opctx, &sitrep1) + .fm_sitrep_insert(&opctx, sitrep1.clone()) .await .expect("inserting initial sitrep should succeed"); @@ -1009,9 +1315,10 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(sitrep1.metadata.id), }, + cases: Default::default(), }; datastore - .fm_sitrep_insert(&opctx, &sitrep2) + .fm_sitrep_insert(&opctx, sitrep2.clone()) .await .expect("inserting child sitrep should succeed"); @@ -1042,7 +1349,7 @@ mod tests { ) -> Result, Error> { let mut listed_orphans = BTreeSet::new(); let mut paginator = Paginator::new( - crate::db::datastore::SQL_BATCH_SIZE, + SQL_BATCH_SIZE, dropshot::PaginationOrder::Descending, ); while let Some(p) = paginator.next() { @@ -1072,8 +1379,9 @@ mod tests { time_created: Utc::now(), parent_sitrep_id, }, + cases: Default::default(), }; - match datastore.fm_sitrep_insert(&opctx, &sitrep).await { + match datastore.fm_sitrep_insert(&opctx, sitrep).await { Ok(_) => { panic!("inserting sitrep v{v} orphan {i} should not succeed") } diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index 2d74db5ab9c..2f71eb7ac50 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -39,6 +39,7 @@ define_enums! { ClickhouseModeEnum => "clickhouse_mode", DatasetKindEnum => "dataset_kind", DbMetadataNexusStateEnum => "db_metadata_nexus_state", + DiagnosisEngineEnum => "diagnosis_engine", DiskTypeEnum => "disk_type", DnsGroupEnum => "dns_group", DownstairsClientStopRequestReasonEnum => "downstairs_client_stop_request_reason_type", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 8be6bb768c2..0a4f2988bce 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -2954,3 +2954,32 @@ allow_tables_to_appear_in_same_query!( rendezvous_local_storage_dataset, local_storage_dataset_allocation ); + +table! { + fm_case (sitrep_id, id) { + id -> Uuid, + sitrep_id -> Uuid, + de -> crate::enums::DiagnosisEngineEnum, + + created_sitrep_id -> Uuid, + closed_sitrep_id -> Nullable, + + comment -> Text, + } +} + +table! { + fm_ereport_in_case (sitrep_id, id) { + id -> Uuid, + restart_id -> Uuid, + ena -> Int8, + case_id -> Uuid, + sitrep_id -> Uuid, + assigned_sitrep_id -> Uuid, + + comment -> Text, + } +} + +allow_tables_to_appear_in_same_query!(fm_ereport_in_case, ereport); +allow_tables_to_appear_in_same_query!(fm_sitrep, fm_case); diff --git a/nexus/src/app/background/tasks/fm_sitrep_gc.rs b/nexus/src/app/background/tasks/fm_sitrep_gc.rs index 92214faef4b..372ae80c6a7 100644 --- a/nexus/src/app/background/tasks/fm_sitrep_gc.rs +++ b/nexus/src/app/background/tasks/fm_sitrep_gc.rs @@ -152,9 +152,10 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; datastore - .fm_sitrep_insert(&opctx, &sitrep1) + .fm_sitrep_insert(&opctx, sitrep1.clone()) .await .expect("inserting initial sitrep should succeed"); @@ -174,9 +175,10 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(sitrep1.metadata.id), }, + cases: Default::default(), }; datastore - .fm_sitrep_insert(&opctx, &sitrep2) + .fm_sitrep_insert(&opctx, sitrep2.clone()) .await .expect("inserting child sitrep should succeed"); @@ -264,9 +266,12 @@ mod tests { comment: format!("test sitrep v{i}; orphan {i}"), time_created: Utc::now(), parent_sitrep_id, + // TODO(eliza): we should populate cases and assert they get + // cleaned up... }, + cases: Default::default(), }; - match datastore.fm_sitrep_insert(&opctx, &sitrep).await { + match datastore.fm_sitrep_insert(&opctx, sitrep).await { Ok(_) => { panic!("inserting sitrep v{v} orphan {i} should not succeed") } diff --git a/nexus/src/app/background/tasks/fm_sitrep_load.rs b/nexus/src/app/background/tasks/fm_sitrep_load.rs index 0a2c52f95b1..0dfaf2f7b0f 100644 --- a/nexus/src/app/background/tasks/fm_sitrep_load.rs +++ b/nexus/src/app/background/tasks/fm_sitrep_load.rs @@ -23,7 +23,7 @@ pub struct SitrepLoader { tx: watch::Sender, } -type CurrentSitrep = Option>; +pub type CurrentSitrep = Option>; impl BackgroundTask for SitrepLoader { fn activate<'a>( @@ -224,9 +224,10 @@ mod test { comment: "test sitrep 1".to_string(), time_created: Utc::now(), }, + cases: Default::default(), }; datastore - .fm_sitrep_insert(&opctx, &sitrep1) + .fm_sitrep_insert(&opctx, sitrep1.clone()) .await .expect("sitrep should be inserted successfully"); @@ -288,9 +289,10 @@ mod test { comment: "test sitrep 2".to_string(), time_created: Utc::now(), }, + cases: Default::default(), }; datastore - .fm_sitrep_insert(&opctx, &sitrep2) + .fm_sitrep_insert(&opctx, sitrep2.clone()) .await .expect("sitrep2 should be inserted successfully"); diff --git a/nexus/types/src/fm.rs b/nexus/types/src/fm.rs index 3f90379388c..eb8cfaaee3b 100644 --- a/nexus/types/src/fm.rs +++ b/nexus/types/src/fm.rs @@ -8,11 +8,14 @@ //! structure containing fault management state. pub mod ereport; -pub use ereport::Ereport; +pub use ereport::{Ereport, EreportId}; + +pub mod case; +pub use case::Case; use chrono::{DateTime, Utc}; +use iddqd::IdOrdMap; use omicron_uuid_kinds::{CollectionUuid, OmicronZoneUuid, SitrepUuid}; -use schemars::JsonSchema; use serde::{Deserialize, Serialize}; /// A fault management situation report, or _sitrep_. @@ -30,12 +33,12 @@ use serde::{Deserialize, Serialize}; /// The sitrep, how it is represented in the database, and how the fault /// management subsystem creates and interacts with sitreps, is described in /// detail in [RFD 603](https://rfd.shared.oxide.computer/rfd/0603). -#[derive(Clone, Debug, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct Sitrep { /// Metadata describing this sitrep, when it was created, its parent sitrep /// ID, and which Nexus produced it. pub metadata: SitrepMetadata, - // TODO(eliza): draw the rest of the sitrep + pub cases: IdOrdMap, } impl Sitrep { @@ -46,12 +49,20 @@ impl Sitrep { pub fn parent_id(&self) -> Option { self.metadata.parent_sitrep_id } + + /// Iterate over all the open cases in this sitrep. + /// + /// All cases returned by this iterator will be copied forward into any + /// child sitreps that descend from this one. + pub fn open_cases(&self) -> impl Iterator + '_ { + self.cases.iter().filter(|c| c.is_open()) + } } /// Metadata describing a sitrep. /// /// This corresponds to the records stored in the `fm_sitrep` database table. -#[derive(Clone, Debug, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct SitrepMetadata { /// The ID of this sitrep. pub id: SitrepUuid, @@ -91,9 +102,26 @@ pub struct SitrepMetadata { } /// An entry in the sitrep version history. -#[derive(Clone, Debug, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct SitrepVersion { pub id: SitrepUuid, pub version: u32, pub time_made_current: DateTime, } + +#[derive( + Copy, + Clone, + Debug, + PartialEq, + Eq, + Hash, + serde::Serialize, + serde::Deserialize, + strum::Display, +)] +#[serde(rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] +pub enum DiagnosisEngineKind { + PowerShelf, +} diff --git a/nexus/types/src/fm/case.rs b/nexus/types/src/fm/case.rs new file mode 100644 index 00000000000..bde7ac3f1d9 --- /dev/null +++ b/nexus/types/src/fm/case.rs @@ -0,0 +1,303 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::fm::DiagnosisEngineKind; +use crate::fm::Ereport; +use iddqd::{IdOrdItem, IdOrdMap}; +use omicron_uuid_kinds::{CaseEreportUuid, CaseUuid, SitrepUuid}; +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::sync::Arc; + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct Case { + pub id: CaseUuid, + pub created_sitrep_id: SitrepUuid, + pub closed_sitrep_id: Option, + + pub de: DiagnosisEngineKind, + + pub ereports: IdOrdMap, + + pub comment: String, +} + +impl Case { + pub fn is_open(&self) -> bool { + self.closed_sitrep_id.is_none() + } + + pub fn display_indented( + &self, + indent: usize, + sitrep_id: Option, + ) -> impl fmt::Display + '_ { + DisplayCase { case: self, indent, sitrep_id } + } +} + +impl fmt::Display for Case { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.display_indented(0, None).fmt(f) + } +} + +impl IdOrdItem for Case { + type Key<'a> = &'a CaseUuid; + fn key(&self) -> Self::Key<'_> { + &self.id + } + + iddqd::id_upcast!(); +} + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct CaseEreport { + pub id: CaseEreportUuid, + pub ereport: Arc, + pub assigned_sitrep_id: SitrepUuid, + pub comment: String, +} + +impl IdOrdItem for CaseEreport { + type Key<'a> = as IdOrdItem>::Key<'a>; + fn key(&self) -> Self::Key<'_> { + self.ereport.key() + } + + iddqd::id_upcast!(); +} + +struct DisplayCase<'a> { + case: &'a Case, + indent: usize, + sitrep_id: Option, +} + +impl fmt::Display for DisplayCase<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + const BULLET: &str = "* "; + const fn const_max_len(strs: &[&str]) -> usize { + let mut max = 0; + let mut i = 0; + while i < strs.len() { + let len = strs[i].len(); + if len > max { + max = len; + } + i += 1; + } + max + } + + let &Self { + case: + Case { + id, + created_sitrep_id, + closed_sitrep_id, + de, + ereports, + comment, + }, + indent, + sitrep_id, + } = self; + + let this_sitrep = move |s| { + if Some(s) == sitrep_id { " <-- this sitrep" } else { "" } + }; + + writeln!( + f, + "{:>indent$}case {id}", + if indent > 0 { BULLET } else { "" } + )?; + writeln!( + f, + "{:>indent$}=========================================", + "" + )?; + + const DE: &str = "diagnosis engine:"; + const OPENED_IN: &str = "opened in sitrep:"; + const CLOSED_IN: &str = "closed in sitrep:"; + const WIDTH: usize = const_max_len(&[DE, OPENED_IN, CLOSED_IN]); + writeln!(f, "{:>indent$}{DE:indent$}{OPENED_IN:indent$}{CLOSED_IN:indent$}comment: {comment}", "")?; + + if !ereports.is_empty() { + writeln!(f, "\n{:>indent$}ereports:", "")?; + writeln!(f, "{:>indent$}---------", "")?; + + let indent = indent + 2; + for CaseEreport { id, ereport, assigned_sitrep_id, comment } in + ereports + { + const CLASS: &str = "class:"; + const REPORTED_BY: &str = "reported by:"; + const ADDED_IN: &str = "added in:"; + const ASSIGNMENT_ID: &str = "assignment ID:"; + const COMMENT: &str = "comment:"; + + const WIDTH: usize = const_max_len(&[ + CLASS, + REPORTED_BY, + ADDED_IN, + ASSIGNMENT_ID, + COMMENT, + ]); + + let pn = ereport.part_number.as_deref().unwrap_or(""); + let sn = + ereport.serial_number.as_deref().unwrap_or(""); + writeln!(f, "{BULLET:>indent$}ereport {}", ereport.id())?; + writeln!( + f, + "{:>indent$}{CLASS:") + )?; + writeln!( + f, + "{:>indent$}{REPORTED_BY:11}:{sn:<11} ({})", + "", ereport.reporter + )?; + writeln!( + f, + "{:>indent$}{ADDED_IN:indent$}{ASSIGNMENT_ID:indent$}{COMMENT:::MIN_UTC; + + let ereport1 = CaseEreport { + id: CaseEreportUuid::from_str( + "89f650fd-c67c-4dcc-9acc-0ce02d43a62b", + ) + .unwrap(), + ereport: Arc::new(Ereport { + data: crate::fm::ereport::EreportData { + id: EreportId { restart_id, ena: Ena::from(2u64) }, + time_collected, + collector_id, + serial_number: Some("BRM6900420".to_string()), + part_number: Some("913-0000037".to_string()), + class: Some("hw.pwr.remove.psu".to_string()), + report: serde_json::json!({}), + }, + reporter: crate::fm::ereport::Reporter::Sp { + sp_type: SpType::Power, + slot: 0, + }, + }), + assigned_sitrep_id: created_sitrep_id, + comment: "PSU removed".to_string(), + }; + ereports.insert_unique(ereport1).unwrap(); + + let ereport2 = CaseEreport { + id: CaseEreportUuid::from_str( + "7b923ffc-f5fc-4001-acf4-1224dad7d3ef", + ) + .unwrap(), + ereport: Arc::new(Ereport { + data: crate::fm::ereport::EreportData { + id: EreportId { restart_id, ena: Ena::from(3u64) }, + time_collected, + collector_id, + serial_number: Some("BRM6900420".to_string()), + part_number: Some("913-0000037".to_string()), + class: Some("hw.pwr.insert.psu".to_string()), + report: serde_json::json!({}), + }, + reporter: crate::fm::ereport::Reporter::Sp { + sp_type: SpType::Power, + slot: 0, + }, + }), + assigned_sitrep_id: closed_sitrep_id, + comment: "PSU inserted, closing this case".to_string(), + }; + ereports.insert_unique(ereport2).unwrap(); + + // Create the case + let case = Case { + id: case_id, + created_sitrep_id, + closed_sitrep_id: Some(closed_sitrep_id), + de: DiagnosisEngineKind::PowerShelf, + ereports, + comment: "Power shelf rectifier added and removed here :-)" + .to_string(), + }; + + eprintln!("example case display:"); + eprintln!("=====================\n"); + eprintln!("{case}"); + + eprintln!("example case display (indented by 4):"); + eprintln!("=====================================\n"); + eprintln!("{}", case.display_indented(4, Some(closed_sitrep_id))); + } +} diff --git a/nexus/types/src/fm/ereport.rs b/nexus/types/src/fm/ereport.rs index dd840550c3d..1ad263af15f 100644 --- a/nexus/types/src/fm/ereport.rs +++ b/nexus/types/src/fm/ereport.rs @@ -23,6 +23,28 @@ pub struct Ereport { pub reporter: Reporter, } +impl Ereport { + pub fn id(&self) -> &EreportId { + &self.data.id + } +} + +impl core::ops::Deref for Ereport { + type Target = EreportData; + fn deref(&self) -> &Self::Target { + &self.data + } +} + +impl iddqd::IdOrdItem for Ereport { + type Key<'a> = &'a EreportId; + fn key(&self) -> Self::Key<'_> { + self.id() + } + + iddqd::id_upcast!(); +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct EreportData { #[serde(flatten)] @@ -123,7 +145,16 @@ impl EreportData { /// Describes the source of an ereport. #[derive( - Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize, + Copy, + Clone, + Debug, + Eq, + PartialEq, + Ord, + PartialOrd, + Serialize, + Deserialize, + Hash, )] #[serde(tag = "reporter")] pub enum Reporter { @@ -133,18 +164,17 @@ pub enum Reporter { impl fmt::Display for Reporter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Display format based on: + // https://rfd.shared.oxide.computer/rfd/200#_labeling match self { - Self::Sp { sp_type: SpType::Sled, slot } => { - write!(f, "Sled (SP) {slot:02}") - } - Self::Sp { sp_type: SpType::Switch, slot } => { - write!(f, "Switch {slot}") - } - Self::Sp { sp_type: SpType::Power, slot } => { - write!(f, "PSC {slot}") + Self::Sp { sp_type: sp_type @ SpType::Sled, slot } => { + write!(f, "{sp_type} {slot:<2} (SP)") } Self::HostOs { sled } => { - write!(f, "Sled (OS) {sled:?}") + write!(f, "{} {sled:?} (OS)", SpType::Sled) + } + Self::Sp { sp_type, slot } => { + write!(f, "{sp_type} {slot}") } } } diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs index 42264d0411b..daaf00eeb03 100644 --- a/nexus/types/src/internal_api/background.rs +++ b/nexus/types/src/internal_api/background.rs @@ -15,6 +15,7 @@ use omicron_uuid_kinds::AlertReceiverUuid; use omicron_uuid_kinds::AlertUuid; use omicron_uuid_kinds::BlueprintUuid; use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::SitrepUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::SupportBundleUuid; use omicron_uuid_kinds::TufRepoUuid; @@ -886,6 +887,25 @@ pub struct SitrepGcStatus { pub errors: Vec, } +/// The status of a `fm_sitrep_execution` background task activation. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)] +pub enum SitrepExecutionStatus { + NoSitrep, + Executed { sitrep_id: SitrepUuid, alerts: SitrepAlertRequestStatus }, +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] +pub struct SitrepAlertRequestStatus { + /// The total number of alerts requested by the current sitrep. + pub total_alerts_requested: usize, + /// The total number of alerts which were *first* requested in the current sitrep. + pub current_sitrep_alerts_requested: usize, + /// The number of alerts created by this activation. + pub alerts_created: usize, + /// Errors that occurred during this activation. + pub errors: Vec, +} + #[derive(Debug, Deserialize, Serialize)] pub struct ProbeError { /// ID of the sled we failed to send a probe to. diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 69d43b7e3be..879fe5b3309 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -6842,6 +6842,74 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_sitrep_version_by_id ON omicron.public.fm_sitrep_history (sitrep_id); + +CREATE TYPE IF NOT EXISTS omicron.public.diagnosis_engine AS ENUM ( + 'power_shelf' +); + +CREATE TABLE IF NOT EXISTS omicron.public.fm_case ( + -- Case UUID + id UUID NOT NULL, + -- UUID of the sitrep in which the case had this state. + sitrep_id UUID NOT NULL, + + de omicron.public.diagnosis_engine NOT NULL, + + -- UUID of the sitrep in which the case was created. + created_sitrep_id UUID NOT NULL, + + -- UUID of the sitrep in which the case was closed. If this is not NULL, + -- then the case has been closed. + closed_sitrep_id UUID, + + comment TEXT NOT NULL, + + PRIMARY KEY (sitrep_id, id) +); + +CREATE INDEX IF NOT EXISTS + lookup_fm_cases_for_sitrep +ON omicron.public.fm_case (sitrep_id); + +CREATE TABLE IF NOT EXISTS omicron.public.fm_ereport_in_case ( + -- ID of this association. When an ereport is assigned to a case, that + -- association is assigned a UUID. These are used primarily to aid in + -- paginating queries to this table, which would otherwise require a + -- three-column pagination utility in order to paginate by (case_id, + -- restart_id, ena). + id UUID NOT NULL, + -- The ereport's identity. + restart_id UUID NOT NULL, + ena INT8 NOT NULL, + + -- UUID of the case the ereport is assigned to. + case_id UUID NOT NULL, + + -- UUID of the sitrep in which this assignment exists. + sitrep_id UUID NOT NULL, + -- UUID of the sitrep in which the ereport was initially assigned to this + -- case. + assigned_sitrep_id UUID NOT NULL, + + comment TEXT NOT NULL, + + PRIMARY KEY (sitrep_id, id) +); + +-- The same ereport may not be assigned to the same case multiple times. +CREATE UNIQUE INDEX IF NOT EXISTS + lookup_ereport_assignments_by_ereport +ON omicron.public.fm_ereport_in_case ( + sitrep_id, + case_id, + restart_id, + ena +); + +CREATE INDEX IF NOT EXISTS + lookup_ereports_assigned_to_fm_case +ON omicron.public.fm_ereport_in_case (sitrep_id, case_id); + /* * List of datasets available to be sliced up and passed to VMMs for instance * local storage. @@ -7390,7 +7458,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '212.0.0', NULL) + (TRUE, NOW(), NOW(), '213.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/fm-cases/up1.sql b/schema/crdb/fm-cases/up1.sql new file mode 100644 index 00000000000..80d6fa3a7aa --- /dev/null +++ b/schema/crdb/fm-cases/up1.sql @@ -0,0 +1,3 @@ +CREATE TYPE IF NOT EXISTS omicron.public.diagnosis_engine AS ENUM ( + 'power_shelf' +); diff --git a/schema/crdb/fm-cases/up2.sql b/schema/crdb/fm-cases/up2.sql new file mode 100644 index 00000000000..9dd4a3b573d --- /dev/null +++ b/schema/crdb/fm-cases/up2.sql @@ -0,0 +1,19 @@ +CREATE TABLE IF NOT EXISTS omicron.public.fm_case ( + -- Case UUID + id UUID NOT NULL, + -- UUID of the sitrep in which the case had this state. + sitrep_id UUID NOT NULL, + + de omicron.public.diagnosis_engine NOT NULL, + + -- UUID of the sitrep in which the case was created. + created_sitrep_id UUID NOT NULL, + + -- UUID of the sitrep in which the case was closed. If this is not NULL, + -- then the case has been closed. + closed_sitrep_id UUID, + + comment TEXT NOT NULL, + + PRIMARY KEY (sitrep_id, id) +); diff --git a/schema/crdb/fm-cases/up3.sql b/schema/crdb/fm-cases/up3.sql new file mode 100644 index 00000000000..841081167ce --- /dev/null +++ b/schema/crdb/fm-cases/up3.sql @@ -0,0 +1,3 @@ +CREATE INDEX IF NOT EXISTS + lookup_fm_cases_for_sitrep +ON omicron.public.fm_case (sitrep_id); diff --git a/schema/crdb/fm-cases/up4.sql b/schema/crdb/fm-cases/up4.sql new file mode 100644 index 00000000000..d1da351a01e --- /dev/null +++ b/schema/crdb/fm-cases/up4.sql @@ -0,0 +1,24 @@ +CREATE TABLE IF NOT EXISTS omicron.public.fm_ereport_in_case ( + -- ID of this association. When an ereport is assigned to a case, that + -- association is assigned a UUID. These are used primarily to aid in + -- paginating queries to this table, which would otherwise require a + -- three-column pagination utility in order to paginate by (case_id, + -- restart_id, ena). + id UUID NOT NULL, + -- The ereport's identity. + restart_id UUID NOT NULL, + ena INT8 NOT NULL, + + -- UUID of the case the ereport is assigned to. + case_id UUID NOT NULL, + + -- UUID of the sitrep in which this assignment exists. + sitrep_id UUID NOT NULL, + -- UUID of the sitrep in which the ereport was initially assigned to this + -- case. + assigned_sitrep_id UUID NOT NULL, + + comment TEXT NOT NULL, + + PRIMARY KEY (sitrep_id, id) +); diff --git a/schema/crdb/fm-cases/up5.sql b/schema/crdb/fm-cases/up5.sql new file mode 100644 index 00000000000..4a3bd79af04 --- /dev/null +++ b/schema/crdb/fm-cases/up5.sql @@ -0,0 +1,9 @@ +-- The same ereport may not be assigned to the same case multiple times. +CREATE UNIQUE INDEX IF NOT EXISTS + lookup_ereport_assignments_by_ereport +ON omicron.public.fm_ereport_in_case ( + sitrep_id, + case_id, + restart_id, + ena +); diff --git a/schema/crdb/fm-cases/up6.sql b/schema/crdb/fm-cases/up6.sql new file mode 100644 index 00000000000..d8f9b0f9be3 --- /dev/null +++ b/schema/crdb/fm-cases/up6.sql @@ -0,0 +1,3 @@ +CREATE INDEX IF NOT EXISTS + lookup_ereports_assigned_to_fm_case +ON omicron.public.fm_ereport_in_case (sitrep_id, case_id); diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index d0bc04ffe8e..1b190e0f0ce 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -45,6 +45,8 @@ impl_typed_uuid_kinds! { AntiAffinityGroup = {}, Blueprint = {}, BuiltInUser = {}, + Case = {}, + CaseEreport = {}, Collection = {}, ConsoleSession = {}, Dataset = {},