diff --git a/Cargo.lock b/Cargo.lock index 5b1e25ac539..e622f624f1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6738,6 +6738,7 @@ dependencies = [ "test-strategy", "thiserror 2.0.17", "tokio", + "trust-quorum-protocol", "tufaceous-artifact", "uuid", "vergen-gitcl", @@ -6830,6 +6831,7 @@ dependencies = [ "term 0.7.0", "thiserror 2.0.17", "tokio", + "trust-quorum-protocol", "tufaceous-artifact", "url", "usdt 0.5.0", @@ -7513,6 +7515,7 @@ dependencies = [ "thiserror 2.0.17", "tokio", "tough", + "trust-quorum-protocol", "tufaceous-artifact", "unicode-width 0.1.14", "update-engine", diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index ffce1c15e9a..e2baa6b1244 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -41,6 +41,7 @@ slog-error-chain.workspace = true steno.workspace = true strum.workspace = true thiserror.workspace = true +trust-quorum-protocol.workspace = true tokio.workspace = true uuid.workspace = true diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index b2e1c3ed10d..201560d93b0 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -488,7 +488,7 @@ impl<'a> From<&'a Collection> for InvCollection { } /// See [`nexus_types::inventory::BaseboardId`]. -#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[derive(Queryable, Insertable, Clone, Debug, Selectable, PartialEq, Eq)] #[diesel(table_name = hw_baseboard_id)] pub struct HwBaseboardId { pub id: Uuid, @@ -496,6 +496,15 @@ pub struct HwBaseboardId { pub serial_number: String, } +impl From for trust_quorum_protocol::BaseboardId { + fn from(value: HwBaseboardId) -> Self { + Self { + part_number: value.part_number, + serial_number: value.serial_number, + } + } +} + impl From for HwBaseboardId { fn from(c: BaseboardId) -> Self { HwBaseboardId { diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index a0497f5d932..0dcf72ac5fa 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -85,6 +85,7 @@ mod silo_auth_settings; mod switch_interface; mod switch_port; mod target_release; +mod trust_quorum; mod v2p_mapping; mod vmm_state; mod webhook_delivery; @@ -258,6 +259,7 @@ pub use switch::*; pub use switch_interface::*; pub use switch_port::*; pub use target_release::*; +pub use trust_quorum::*; pub use tuf_repo::*; pub use typed_uuid::DbTypedUuid; pub use typed_uuid::to_db_typed_uuid; diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 013dba0c8d2..2f25bea2817 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(211, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(212, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(212, "add-trust-quorum"), KnownVersion::new(211, "blueprint-sled-config-subnet"), KnownVersion::new(210, "one-big-ereport-table"), KnownVersion::new(209, "multicast-group-support"), diff --git a/nexus/db-model/src/trust_quorum.rs b/nexus/db-model/src/trust_quorum.rs new file mode 100644 index 00000000000..91b1c557483 --- /dev/null +++ b/nexus/db-model/src/trust_quorum.rs @@ -0,0 +1,112 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Database representations for trust quorum types + +use super::impl_enum_type; +use crate::SqlU8; +use crate::typed_uuid::DbTypedUuid; +use nexus_db_schema::schema::{ + lrtq_member, trust_quorum_configuration, trust_quorum_member, +}; +use nexus_types::trust_quorum::{ + TrustQuorumConfigState, TrustQuorumMemberState, +}; +use omicron_uuid_kinds::RackKind; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +impl_enum_type!( + TrustQuorumConfigurationStateEnum: + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + pub enum DbTrustQuorumConfigurationState; + + // Enum values + Preparing => b"preparing" + Committed => b"committed" + Aborted => b"aborted" +); + +impl From for TrustQuorumConfigState { + fn from(value: DbTrustQuorumConfigurationState) -> Self { + match value { + DbTrustQuorumConfigurationState::Preparing => Self::Preparing, + DbTrustQuorumConfigurationState::Committed => Self::Committed, + DbTrustQuorumConfigurationState::Aborted => Self::Aborted, + } + } +} + +impl From for DbTrustQuorumConfigurationState { + fn from(value: TrustQuorumConfigState) -> Self { + match value { + TrustQuorumConfigState::Preparing => Self::Preparing, + TrustQuorumConfigState::Committed => Self::Committed, + TrustQuorumConfigState::Aborted => Self::Aborted, + } + } +} + +impl_enum_type!( + TrustQuorumMemberStateEnum: + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + pub enum DbTrustQuorumMemberState; + + // Enum values + Unacked => b"unacked" + Prepared => b"prepared" + Committed => b"committed" +); + +impl From for TrustQuorumMemberState { + fn from(value: DbTrustQuorumMemberState) -> Self { + match value { + DbTrustQuorumMemberState::Unacked => Self::Unacked, + DbTrustQuorumMemberState::Prepared => Self::Prepared, + DbTrustQuorumMemberState::Committed => Self::Committed, + } + } +} + +impl From for DbTrustQuorumMemberState { + fn from(value: TrustQuorumMemberState) -> Self { + match value { + TrustQuorumMemberState::Unacked => Self::Unacked, + TrustQuorumMemberState::Prepared => Self::Prepared, + TrustQuorumMemberState::Committed => Self::Committed, + } + } +} + +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = lrtq_member)] +pub struct LrtqMember { + pub rack_id: DbTypedUuid, + pub hw_baseboard_id: Uuid, +} + +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = trust_quorum_configuration)] +pub struct TrustQuorumConfiguration { + pub rack_id: DbTypedUuid, + pub epoch: i64, + pub state: DbTrustQuorumConfigurationState, + pub threshold: SqlU8, + pub commit_crash_tolerance: SqlU8, + pub coordinator: Uuid, + pub encrypted_rack_secrets_salt: Option, + pub encrypted_rack_secrets: Option>, +} + +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = trust_quorum_member)] +pub struct TrustQuorumMember { + pub rack_id: DbTypedUuid, + pub epoch: i64, + pub hw_baseboard_id: Uuid, + pub state: DbTrustQuorumMemberState, + pub share_digest: Option, +} diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 314f9614a91..2483c331a04 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -54,6 +54,7 @@ swrite.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tufaceous-artifact.workspace = true +trust-quorum-protocol.workspace = true url.workspace = true usdt.workspace = true uuid.workspace = true diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 354c58a0300..698dcd14405 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -116,6 +116,7 @@ mod switch_port; mod target_release; #[cfg(test)] pub(crate) mod test_utils; +mod trust_quorum; pub mod update; mod user_data_export; mod utilization; diff --git a/nexus/db-queries/src/db/datastore/trust_quorum.rs b/nexus/db-queries/src/db/datastore/trust_quorum.rs new file mode 100644 index 00000000000..e8c7fa6cab1 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/trust_quorum.rs @@ -0,0 +1,792 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Trust quorum related queries + +use super::DataStore; +use crate::authz; +use crate::context::OpContext; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::prelude::*; +use nexus_db_errors::ErrorHandler; +use nexus_db_errors::OptionalError; +use nexus_db_errors::TransactionError; +use nexus_db_errors::public_error_from_diesel; +use nexus_db_errors::public_error_from_diesel_create; +use nexus_db_lookup::DbConnection; +use nexus_db_model::DbTrustQuorumConfigurationState; +use nexus_db_model::DbTrustQuorumMemberState; +use nexus_db_model::DbTypedUuid; +use nexus_db_model::HwBaseboardId; +use nexus_db_model::TrustQuorumConfiguration as DbTrustQuorumConfiguration; +use nexus_db_model::TrustQuorumMember as DbTrustQuorumMember; +use nexus_types::trust_quorum::{ + TrustQuorumConfig, TrustQuorumConfigState, TrustQuorumMemberData, + TrustQuorumMemberState, +}; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::OptionalLookupResult; +use omicron_common::bail_unless; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::RackKind; +use omicron_uuid_kinds::RackUuid; +use std::collections::{BTreeMap, BTreeSet}; +use trust_quorum_protocol::{ + BaseboardId, EncryptedRackSecrets, Epoch, Salt, Sha3_256Digest, Threshold, +}; + +macro_rules! bail_txn { + ($err:ident, $($arg:tt),*) => { + return Err($err.bail( + omicron_common::api::external::Error::internal_error(&format!( + $($arg),* + )) + .into() + )); + } +} + +fn i64_to_epoch(val: i64) -> Result { + let Ok(epoch) = val.try_into() else { + return Err(Error::internal_error(&format!( + "Failed to convert i64 from database: {val} \ + into trust quroum epoch", + ))); + }; + Ok(Epoch(epoch)) +} + +fn epoch_to_i64(epoch: Epoch) -> Result { + epoch.0.try_into().map_err(|_| { + Error::internal_error(&format!( + "Failed to convert trust quorum epoch to i64 in attempt to insert \ + into database: {epoch}" + )) + }) +} + +impl DataStore { + /// Return all `HwBaseboardId`s for a given rack that has run LRTQ + /// + /// No need for pagination, as there at most 32 member sleds per rack + pub async fn lrtq_members( + &self, + opctx: &OpContext, + rack_id: RackUuid, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(opctx).await?; + + use nexus_db_schema::schema::hw_baseboard_id::dsl as hw_baseboard_id_dsl; + use nexus_db_schema::schema::lrtq_member::dsl as lrtq_member_dsl; + + lrtq_member_dsl::lrtq_member + .filter(lrtq_member_dsl::rack_id.eq(rack_id.into_untyped_uuid())) + .inner_join(hw_baseboard_id_dsl::hw_baseboard_id.on( + hw_baseboard_id_dsl::id.eq(lrtq_member_dsl::hw_baseboard_id), + )) + .select(HwBaseboardId::as_select()) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + pub async fn tq_get_latest_config( + &self, + opctx: &OpContext, + rack_id: RackUuid, + ) -> OptionalLookupResult { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(opctx).await?; + + // First, retrieve our configuration if there is one. + let Some(latest) = + self.tq_get_latest_config_conn(opctx, conn, rack_id).await? + else { + return Ok(None); + }; + + // Then get any members associated with the configuration + let members = self + .tq_get_members_conn(opctx, conn, rack_id, latest.epoch) + .await?; + + let mut tq_members: BTreeMap = + BTreeMap::new(); + let mut coordinator: Option = None; + for (member, hw_baseboard_id) in members { + let digest = if let Some(digest_str) = member.share_digest { + let mut data = [0u8; 32]; + hex::decode_to_slice(&digest_str, &mut data).map_err(|e| { + Error::internal_error(&format!( + "Failed to decode share digest for trust quorum member \ + {}:{} : {e}", + hw_baseboard_id.part_number, + hw_baseboard_id.serial_number + )) + })?; + Some(Sha3_256Digest(data)) + } else { + None + }; + + // The coordinator is always a member of the group + // We pull out its `BaseboardId` here. + if latest.coordinator == hw_baseboard_id.id { + coordinator = Some(hw_baseboard_id.clone().into()); + } + tq_members.insert( + hw_baseboard_id.into(), + TrustQuorumMemberData { state: member.state.into(), digest }, + ); + } + + let salt = if let Some(salt_str) = latest.encrypted_rack_secrets_salt { + let mut data = [0u8; 32]; + hex::decode_to_slice(&salt_str, &mut data).map_err(|e| { + Error::internal_error(&format!( + "Failed to decode salt for trust quorum config: \ + rack_id: {}, epoch: {}: {e}", + latest.rack_id, latest.epoch + )) + })?; + Some(Salt(data)) + } else { + None + }; + + let encrypted_rack_secrets = if salt.is_some() { + let Some(secrets) = latest.encrypted_rack_secrets else { + // This should never happend due to constraint checks + return Err(Error::internal_error(&format!( + "Salt exists, but secrets do not for trust quorum config: \ + rack_id: {}, epoch: {}", + latest.rack_id, latest.epoch + ))); + }; + Some(EncryptedRackSecrets::new( + salt.unwrap(), + secrets.into_boxed_slice(), + )) + } else { + None + }; + + let Some(coordinator) = coordinator else { + return Err(Error::internal_error(&format!( + "Failed to find coordinator for hw_baseboard_id: {} \ + in trust quorum config.", + latest.coordinator + ))); + }; + + Ok(Some(TrustQuorumConfig { + rack_id: latest.rack_id.into(), + epoch: i64_to_epoch(latest.epoch)?, + state: latest.state.into(), + threshold: Threshold(latest.threshold.into()), + commit_crash_tolerance: latest.commit_crash_tolerance.into(), + coordinator, + encrypted_rack_secrets, + members: tq_members, + })) + } + + /// Insert a new trust quorum configuration, but only if it is equivalent + /// to the highest epoch of the last configuration + 1. + pub async fn tq_insert_latest_config( + &self, + opctx: &OpContext, + config: TrustQuorumConfig, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(opctx).await?; + + let err = OptionalError::new(); + + self.transaction_retry_wrapper("tq_insert_latest_config") + .transaction(&conn, |c| { + let err = err.clone(); + let config = config.clone(); + + async move { + let current = self + .tq_get_latest_epoch_in_txn(opctx, &c, config.rack_id) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + + let is_insertable = if let Some(epoch) = current.clone() { + // Only insert if what is in the DB is immediately prior to + // this configuration. + Some(epoch) == config.epoch.previous() + } else { + // Unconditional update is fine here, since a config doesn't + // exist TODO: Should we ensure that epoch == 1 || epoch == + // 2 ? + true + }; + + if !is_insertable { + return Err(err.bail(TransactionError::CustomError( + Error::conflict(format!( + "expected current TQ epoch for rack_id \ + {} to be {:?}, found {:?}", + config.rack_id, + config.epoch.previous(), + current + )), + ))); + } + + self.insert_tq_config_in_txn(opctx, conn, config) + .await + .map_err(|txn_error| txn_error.into_diesel(&err)) + } + }) + .await + .map_err(|e| match err.take() { + Some(err) => err.into(), + None => public_error_from_diesel(e, ErrorHandler::Server), + }) + } + + /// If this configuration is in the `Preparing` state, then update any members + /// to acknowledge the prepare. + /// + /// Also, update any digests or encrypted rack secrets if necessary. + pub async fn tq_update_prepare_status( + &self, + opctx: &OpContext, + rack_id: RackUuid, + epoch: Epoch, + config: trust_quorum_protocol::Configuration, + acked_prepares: BTreeSet, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(opctx).await?; + + let epoch = epoch_to_i64(epoch)?; + + let err = OptionalError::new(); + + self.transaction_retry_wrapper("tq_update_prepare_status") + .transaction(&conn, |c| { + let err = err.clone(); + let config = config.clone(); + let acked_prepares = acked_prepares.clone(); + async move { + // First, retrieve our configuration if there is one. + let latest = self + .tq_get_latest_config_conn(opctx, &c, rack_id) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + + let Some(db_config) = latest else { + bail_txn!( + err, + "No trust quorum config for rack_id {} at epoch {}", + rack_id, + epoch + ); + }; + + // If we aren't preparing, then ignore this call. Multiple + // Nexuses race to completion and we don't want to worry + // about overwriting commits with prepares in the `state` + // field of each member. + if db_config.state + != DbTrustQuorumConfigurationState::Preparing + { + info!( + opctx.log, + "Ignoring stale update of trust quorum prepare \ + status"; + "state" => ?db_config.state + ); + return Ok(()); + } + + if db_config.epoch != epoch { + let actual = db_config.epoch; + bail_txn!( + err, + "Cannot update trust quorum config. \ + Latest epoch does not match. Expected {}, Got {}", + epoch, + actual + ); + } + + // Then get any members associated with the configuration + let db_members = self + .tq_get_members_conn( + opctx, + &c, + rack_id, + db_config.epoch, + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + + // We only update the configuration in the database if: + // 1. This is the first time we have seen encrypted rack secrets + // 2. We are transitioning from preparing to committed state. + let should_write_secrets = + db_config.encrypted_rack_secrets_salt.is_none() + && config.encrypted_rack_secrets.is_some(); + + let mut total_acks = 0; + for (mut member, hw_id) in db_members { + let baseboard_id: BaseboardId = hw_id.into(); + + // Set the share digest for the member if we just learned it + if member.share_digest.is_none() { + let Some(digest) = + config.members.get(&baseboard_id) + else { + bail_txn!( + err, + "Cannot update share digest for {}. Not a \ + member of the trust quorum configuration.", + baseboard_id + ); + }; + member.share_digest = Some(hex::encode(digest.0)); + } + + // Set the state of this member + if acked_prepares.contains(&baseboard_id) + && member.state == DbTrustQuorumMemberState::Unacked + { + member.state = DbTrustQuorumMemberState::Prepared + // TODO: Let's update this row in the DB + } + + if member.state == DbTrustQuorumMemberState::Prepared { + total_acks += 1; + } + } + + // Do we have enough acks to commit? + let should_commit = total_acks + >= (db_config.threshold.0 + + db_config.commit_crash_tolerance.0) + as usize; + + match (should_write_secrets, should_commit) { + (true, true) => { + + // TODO: write secrets and commit + } + (true, false) => { + self.update_tq_encrypted_rack_secrets_in_txn( + opctx, + conn, + db_config.rack_id, + db_config.epoch, + config.encrypted_rack_secrets.unwrap(), + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + } + (false, true) => { + // TODO: commit + } + (false, false) => { + // Nothing to do + } + } + + Ok(()) + } + }) + .await + .map_err(|e| match err.take() { + Some(err) => err.into(), + None => public_error_from_diesel(e, ErrorHandler::Server), + }) + } + + pub async fn tq_update_commit_status( + &self, + opctx: &OpContext, + rack_id: RackUuid, + epoch: Epoch, + acked_commits: BTreeSet, + ) { + todo!() + } + + // Unconditional insert that should only run inside a transaction + async fn insert_tq_config_in_txn( + &self, + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + config: TrustQuorumConfig, + ) -> Result<(), TransactionError> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + let members = self + .lookup_hw_baseboard_ids_conn( + opctx, + conn, + config.members.keys().cloned(), + ) + .await?; + + let (salt, secrets) = + config.encrypted_rack_secrets.map_or((None, None), |s| { + (Some(hex::encode(s.salt.0)), Some(s.data.into())) + }); + + // Max of 32 members to search. We could use binary search if we sorted + // the output with an `order_by` in the DB query, or speed up search + // if converted to a map. Neither seems necessary for such a rare + // operation. + let coordinator_id = members.iter().find(|m| { + m.part_number == config.coordinator.part_number + && m.serial_number == config.coordinator.serial_number + }); + bail_unless!( + coordinator_id.is_some(), + "Coordinator: {} is not a member of the trust quorum", + config.coordinator + ); + let coordinator_id = coordinator_id.unwrap().id; + + let epoch = epoch_to_i64(config.epoch) + .map_err(|e| TransactionError::from(e))?; + + // Insert the configuration + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + diesel::insert_into(dsl::trust_quorum_configuration) + .values(DbTrustQuorumConfiguration { + rack_id: config.rack_id.into(), + epoch, + state: config.state.into(), + threshold: config.threshold.0.into(), + commit_crash_tolerance: config.commit_crash_tolerance.into(), + coordinator: coordinator_id, + encrypted_rack_secrets_salt: salt, + encrypted_rack_secrets: secrets, + }) + .execute_async(conn) + .await?; + + // Insert the members + let members: Vec<_> = members + .into_iter() + .map(|m| DbTrustQuorumMember { + rack_id: config.rack_id.into(), + epoch, + hw_baseboard_id: m.id, + state: nexus_db_model::DbTrustQuorumMemberState::Unacked, + share_digest: None, + }) + .collect(); + + use nexus_db_schema::schema::trust_quorum_member::dsl as members_dsl; + diesel::insert_into(members_dsl::trust_quorum_member) + .values(members) + .execute_async(conn) + .await?; + + Ok(()) + } + + async fn update_tq_encrypted_rack_secrets_in_txn( + &self, + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: DbTypedUuid, + epoch: i64, + encrypted_rack_secrets: EncryptedRackSecrets, + ) -> Result<(), TransactionError> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let salt = Some(hex::encode(encrypted_rack_secrets.salt.0)); + let secrets: Option> = Some(encrypted_rack_secrets.data.into()); + + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + + diesel::update(dsl::trust_quorum_configuration) + .filter(dsl::rack_id.eq(rack_id)) + .filter(dsl::epoch.eq(epoch)) + .filter(dsl::encrypted_rack_secrets_salt.is_null()) + .filter(dsl::encrypted_rack_secrets.is_null()) + .set(( + dsl::encrypted_rack_secrets_salt.eq(salt), + dsl::encrypted_rack_secrets.eq(secrets), + )) + .execute_async(conn) + .await?; + + Ok(()) + } + + async fn lookup_hw_baseboard_ids_conn( + &self, + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + members: impl Iterator, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use nexus_db_schema::schema::hw_baseboard_id::dsl; + + let (parts, serials): (Vec<_>, Vec<_>) = members + .into_iter() + .map(|m| (m.part_number, m.serial_number)) + .collect(); + + dsl::hw_baseboard_id + .filter(dsl::part_number.eq_any(parts)) + .filter(dsl::serial_number.eq_any(serials)) + .select(HwBaseboardId::as_select()) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + async fn tq_get_latest_epoch_in_txn( + &self, + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: RackUuid, + ) -> Result, TransactionError> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + let Some(latest_epoch) = dsl::trust_quorum_configuration + .filter(dsl::rack_id.eq(rack_id.into_untyped_uuid())) + .order_by(dsl::epoch.desc()) + .select(dsl::epoch) + .first_async::(conn) + .await + .optional()? + else { + return Ok(None); + }; + let latest_epoch = i64_to_epoch(latest_epoch)?; + Ok(Some(latest_epoch)) + } + + async fn tq_get_latest_config_conn( + &self, + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: RackUuid, + ) -> Result, TransactionError> + { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + + let latest = dsl::trust_quorum_configuration + .filter(dsl::rack_id.eq(rack_id.into_untyped_uuid())) + .order_by(dsl::epoch.desc()) + .first_async::(conn) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(latest) + } + + async fn tq_get_members_conn( + &self, + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: RackUuid, + epoch: i64, + ) -> Result< + Vec<(DbTrustQuorumMember, HwBaseboardId)>, + TransactionError, + > { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use nexus_db_schema::schema::hw_baseboard_id::dsl as hw_baseboard_id_dsl; + use nexus_db_schema::schema::trust_quorum_member::dsl; + + let members = dsl::trust_quorum_member + .filter(dsl::rack_id.eq(rack_id.into_untyped_uuid())) + .filter(dsl::epoch.eq(epoch)) + .inner_join( + hw_baseboard_id_dsl::hw_baseboard_id + .on(hw_baseboard_id_dsl::id.eq(dsl::hw_baseboard_id)), + ) + .select(( + DbTrustQuorumMember::as_select(), + HwBaseboardId::as_select(), + )) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(members) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::pub_test_utils::TestDatabase; + use nexus_db_model::{HwBaseboardId, LrtqMember}; + use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::RackUuid; + use uuid::Uuid; + + async fn insert_hw_baseboard_ids(db: &TestDatabase) -> Vec { + let (_, datastore) = (db.opctx(), db.datastore()); + let conn = datastore.pool_connection_for_tests().await.unwrap(); + use nexus_db_schema::schema::hw_baseboard_id::dsl; + let hw_baseboard_ids: Vec<_> = (0..10) + .map(|i| HwBaseboardId { + id: Uuid::new_v4(), + part_number: "test-part".to_string(), + serial_number: i.to_string(), + }) + .collect(); + + diesel::insert_into(dsl::hw_baseboard_id) + .values(hw_baseboard_ids.clone()) + .execute_async(&*conn) + .await + .unwrap(); + + hw_baseboard_ids + } + + async fn insert_lrtq_members( + db: &TestDatabase, + rack_id1: RackUuid, + rack_id2: RackUuid, + hw_ids: Vec, + ) { + let (_, datastore) = (db.opctx(), db.datastore()); + let conn = datastore.pool_connection_for_tests().await.unwrap(); + use nexus_db_schema::schema::lrtq_member::dsl; + for (i, hw_baseboard_id) in hw_ids.into_iter().enumerate() { + let rack_id = if i < 5 { rack_id1.into() } else { rack_id2.into() }; + diesel::insert_into(dsl::lrtq_member) + .values(LrtqMember { + rack_id, + hw_baseboard_id: hw_baseboard_id.id, + }) + .execute_async(&*conn) + .await + .unwrap(); + } + } + + #[tokio::test] + async fn test_lrtq_members() { + let logctx = test_setup_log("test_lrtq_members"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let rack_id1 = RackUuid::new_v4(); + let rack_id2 = RackUuid::new_v4(); + + // Listing lrtq members should return an empty vec + assert!( + datastore.lrtq_members(opctx, rack_id1).await.unwrap().is_empty() + ); + + // Insert some data + let hw_ids = insert_hw_baseboard_ids(&db).await; + insert_lrtq_members(&db, rack_id1, rack_id2, hw_ids.clone()).await; + + let hw_baseboard_ids1 = + datastore.lrtq_members(opctx, rack_id1).await.unwrap(); + println!("{:?}", hw_baseboard_ids1); + assert_eq!(hw_baseboard_ids1.len(), 5); + let hw_baseboard_ids2 = + datastore.lrtq_members(opctx, rack_id2).await.unwrap(); + assert_eq!(hw_baseboard_ids2.len(), 5); + assert_ne!(hw_baseboard_ids1, hw_baseboard_ids2); + } + + #[tokio::test] + async fn test_insert_latest_tq_round_trip() { + let logctx = test_setup_log("test_insert_latest_tq_round_trip"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let hw_ids = insert_hw_baseboard_ids(&db).await; + + let rack_id = RackUuid::new_v4(); + + // Create an initial config + let mut config = TrustQuorumConfig { + rack_id, + epoch: Epoch(1), + state: TrustQuorumConfigState::Preparing, + threshold: Threshold((hw_ids.len() / 2 + 1) as u8), + commit_crash_tolerance: 2, + coordinator: hw_ids.first().unwrap().clone().into(), + encrypted_rack_secrets: None, + members: hw_ids + .clone() + .into_iter() + .map(|m| (m.into(), TrustQuorumMemberData::new())) + .collect(), + }; + + datastore.tq_insert_latest_config(opctx, config.clone()).await.unwrap(); + + let read_config = datastore + .tq_get_latest_config(opctx, rack_id) + .await + .expect("no error") + .expect("returned config"); + + assert_eq!(config, read_config); + + // Inserting the same config again should fail + datastore + .tq_insert_latest_config(opctx, config.clone()) + .await + .expect_err("duplicate insert should fail"); + + // Bumping the epoch and inserting should succeed + config.epoch = Epoch(2); + datastore.tq_insert_latest_config(opctx, config.clone()).await.unwrap(); + + let read_config = datastore + .tq_get_latest_config(opctx, rack_id) + .await + .expect("no error") + .expect("returned config"); + + assert_eq!(config, read_config); + + // We should get an error if we try to insert with a coordinator that is + // not part of the membership. + config.epoch = Epoch(3); + let saved_serial = config.coordinator.serial_number.clone(); + config.coordinator.serial_number = "dummy".to_string(); + datastore + .tq_insert_latest_config(opctx, config.clone()) + .await + .expect_err("insert should fail with invalid coordinator"); + + // Restoring the serial number should succeed + config.coordinator.serial_number = saved_serial; + datastore.tq_insert_latest_config(opctx, config.clone()).await.unwrap(); + + let read_config = datastore + .tq_get_latest_config(opctx, rack_id) + .await + .expect("no error") + .expect("returned config"); + + assert_eq!(config, read_config); + + // Incrementing the epoch by more than one should fail + config.epoch = Epoch(5); + datastore + .tq_insert_latest_config(opctx, config.clone()) + .await + .expect_err( + "insert should fail because previous epoch is incorrect", + ); + } +} diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index 2d74db5ab9c..5bff7946244 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -98,6 +98,8 @@ define_enums! { SwitchLinkSpeedEnum => "switch_link_speed", SwitchPortGeometryEnum => "switch_port_geometry", TargetReleaseSourceEnum => "target_release_source", + TrustQuorumConfigurationStateEnum => "trust_quorum_configuration_state", + TrustQuorumMemberStateEnum => "trust_quorum_member_state", UpstairsRepairNotificationTypeEnum => "upstairs_repair_notification_type", UpstairsRepairTypeEnum => "upstairs_repair_type", UserDataExportResourceTypeEnum => "user_data_export_resource_type", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index b2280ee6592..ddebaae82f3 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -2914,3 +2914,39 @@ table! { } allow_tables_to_appear_in_same_query!(fm_sitrep, fm_sitrep_history); + +table! { + lrtq_member (rack_id, hw_baseboard_id) { + rack_id -> Uuid, + hw_baseboard_id -> Uuid, + } +} + +allow_tables_to_appear_in_same_query!(lrtq_member, hw_baseboard_id); +joinable!(lrtq_member -> hw_baseboard_id(hw_baseboard_id)); + +table! { + trust_quorum_configuration (rack_id, epoch) { + rack_id -> Uuid, + epoch -> Int8, + state -> crate::enums::TrustQuorumConfigurationStateEnum, + threshold -> Int2, + commit_crash_tolerance -> Int2, + coordinator -> Uuid, + encrypted_rack_secrets_salt -> Nullable, + encrypted_rack_secrets -> Nullable, + } +} + +table! { + trust_quorum_member (rack_id, epoch, hw_baseboard_id) { + rack_id -> Uuid, + epoch -> Int8, + hw_baseboard_id -> Uuid, + state -> crate::enums::TrustQuorumMemberStateEnum, + share_digest -> Nullable, + } +} + +allow_tables_to_appear_in_same_query!(trust_quorum_member, hw_baseboard_id); +joinable!(trust_quorum_member -> hw_baseboard_id(hw_baseboard_id)); diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 5e9b9382644..440656e2376 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -52,6 +52,13 @@ textwrap.workspace = true thiserror.workspace = true tokio.workspace = true tufaceous-artifact.workspace = true + +# TODO: This currently depends on `sled-agent-types`. But that is only because +# of `BaseboardId`. `BaseboardId` is moving to `sled-agent-types-conversions` as +# part of https://github.com/oxidecomputer/omicron/pull/9488. At that point, we +# can remove the dependency from `trust-quorum-protocol`. Yay. +trust-quorum-protocol.workspace = true + newtype-uuid.workspace = true update-engine.workspace = true unicode-width.workspace = true diff --git a/nexus/types/src/lib.rs b/nexus/types/src/lib.rs index fd1f2243bfa..c9b203134a9 100644 --- a/nexus/types/src/lib.rs +++ b/nexus/types/src/lib.rs @@ -38,3 +38,4 @@ pub mod internal_api; pub mod inventory; pub mod quiesce; pub mod silo; +pub mod trust_quorum; diff --git a/nexus/types/src/trust_quorum.rs b/nexus/types/src/trust_quorum.rs new file mode 100644 index 00000000000..3748e1e5d0a --- /dev/null +++ b/nexus/types/src/trust_quorum.rs @@ -0,0 +1,56 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types describing the state of trust quorum in Nexus + +use std::collections::BTreeMap; + +use omicron_uuid_kinds::RackUuid; +use trust_quorum_protocol::{ + BaseboardId, EncryptedRackSecrets, Epoch, Sha3_256Digest, Threshold, +}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TrustQuorumConfigState { + Preparing, + Committed, + Aborted, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TrustQuorumMemberState { + Unacked, + Prepared, + Committed, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TrustQuorumMemberData { + pub state: TrustQuorumMemberState, + + // Only filled in once the coordinator state is succesfully polled by nexus + // after it has created the configuration. + pub digest: Option, +} + +impl TrustQuorumMemberData { + pub fn new() -> Self { + TrustQuorumMemberData { + state: TrustQuorumMemberState::Unacked, + digest: None, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TrustQuorumConfig { + pub rack_id: RackUuid, + pub epoch: Epoch, + pub state: TrustQuorumConfigState, + pub threshold: Threshold, + pub commit_crash_tolerance: u8, + pub coordinator: BaseboardId, + pub encrypted_rack_secrets: Option, + pub members: BTreeMap, +} diff --git a/schema/crdb/add-trust-quorum/up01.sql b/schema/crdb/add-trust-quorum/up01.sql new file mode 100644 index 00000000000..3f120e7fe16 --- /dev/null +++ b/schema/crdb/add-trust-quorum/up01.sql @@ -0,0 +1,14 @@ +-- An LRTQ configuration explicitly placed in the database via a DB migration +-- +-- LRTQ configurations are always epoch 1, and any subsequent trust quorum +-- configuration must have epoch > 1. +CREATE TABLE IF NOT EXISTS omicron.public.lrtq_member ( + -- Foreign key into the rack table + rack_id UUID NOT NULL, + + -- Foreign key into the `hw_baseboard_id` table + -- A sled can only be in one rack, hence the UNIQUE constraint. + hw_baseboard_id UUID NOT NULL UNIQUE, + + PRIMARY KEY (rack_id, hw_baseboard_id) +); diff --git a/schema/crdb/add-trust-quorum/up02.sql b/schema/crdb/add-trust-quorum/up02.sql new file mode 100644 index 00000000000..a0d179b7edf --- /dev/null +++ b/schema/crdb/add-trust-quorum/up02.sql @@ -0,0 +1,10 @@ +-- Whether a node has prepared or committed yet +CREATE TYPE IF NOT EXISTS omicron.public.trust_quorum_member_state AS ENUM ( + -- The node has not acknowledged either a `Prepare` or `Commit` message + 'unacked', + -- The node has acknoweledged a `Prepare` message + 'prepared', + -- The node has acknowledged a `Commit` or `PrepareAndCommit` message + -- `committed` implies `prepared` + 'committed' +); diff --git a/schema/crdb/add-trust-quorum/up03.sql b/schema/crdb/add-trust-quorum/up03.sql new file mode 100644 index 00000000000..c4a07ad7e8b --- /dev/null +++ b/schema/crdb/add-trust-quorum/up03.sql @@ -0,0 +1,12 @@ +-- The state of a given trust quorum configuration +CREATE TYPE IF NOT EXISTS omicron.public.trust_quorum_configuration_state AS ENUM ( + -- Nexus is waiting for prepare acknowledgments by polling the coordinator + -- These may come as part of a reconfiguration or LRTQ upgrade + 'preparing', + -- The configuration has committed to the dataabase, and nexus may still be + -- trying to inform nodes about the commit. + 'committed', + -- The configuration has aborted and will not commit. The epoch can be + -- skipped. + 'aborted' +); diff --git a/schema/crdb/add-trust-quorum/up04.sql b/schema/crdb/add-trust-quorum/up04.sql new file mode 100644 index 00000000000..a75e9ee2213 --- /dev/null +++ b/schema/crdb/add-trust-quorum/up04.sql @@ -0,0 +1,53 @@ +-- Information for tracking trust quorum memberships over time +CREATE TABLE IF NOT EXISTS omicron.public.trust_quorum_configuration ( + -- Foreign key into the rack table + rack_id UUID NOT NULL, + + -- Monotonically increasing version per rack_id + epoch INT8 NOT NULL, + + -- The current state of this configuration + state omicron.public.trust_quorum_configuration_state NOT NULL, + + -- The number of shares needed to compute the rack secret + -- + -- In some documentation we call this the `K` parameter. + threshold INT2 NOT NULL CHECK (threshold > 0), + + -- The number of additional nodes beyond threshold to commit + -- + -- This represents the number of prepared nodes that can be offline after + -- a commit at Nexus and still allow the secret to be reconstructed during + -- rack unlock. If this number is equivalent to the total membership (`N`) + -- minus `threshold` nodes, then all nodes in the membership set for this + -- epoch must ack a prepare for a commit to occur. By varying this value we + -- allow commit to occur even if some nodes haven't prepared, thus providing + -- fault tolerance during the prepare phase and also during unlock. + -- + -- In some documentation we call this the `Z` parameter. + commit_crash_tolerance INT2 NOT NULL CHECK (commit_crash_tolerance >= 0), + + -- Which member is coordinating the prepare phase of the protocol this epoch + -- Foreign key into the `hw_baseboard_id` table + coordinator UUID NOT NULL, + + -- Encrypted rack secrets for prior committed epochs + -- + -- These are only filled in during a reconfiguration and retrieved + -- during the prepare phase of the protocol by Nexus from the coordinator. + -- + -- Salt is a hex-encoded string + encrypted_rack_secrets_salt String(64), + encrypted_rack_secrets BYTES, + + CONSTRAINT encrypted_rack_secrets_both_or_neither_null CHECK ( + (encrypted_rack_secrets_salt IS NULL + AND encrypted_rack_secrets IS NULL) + OR + (encrypted_rack_secrets_salt IS NOT NULL + AND encrypted_rack_secrets IS NOT NULL) + ), + + -- Each rack has its own trust quorum + PRIMARY KEY (rack_id, epoch) +); diff --git a/schema/crdb/add-trust-quorum/up05.sql b/schema/crdb/add-trust-quorum/up05.sql new file mode 100644 index 00000000000..037e17aab58 --- /dev/null +++ b/schema/crdb/add-trust-quorum/up05.sql @@ -0,0 +1,21 @@ +-- Total group membership in trust quorum for a given epoch +CREATE TABLE IF NOT EXISTS omicron.public.trust_quorum_member ( + -- Foreign key into the rack table + -- Foreign key into the `trust_quorum_configuration` table along with `epoch` + rack_id UUID NOT NULL, + + -- Foreign key into the `trust_quorum_configuration` table along with `rack_id` + epoch INT8 NOT NULL, + + -- Foreign key into the `hw_baseboard_id` table + hw_baseboard_id UUID NOT NULL, + + -- The sha3-256 hash of the key share for this node. This is only filled in + -- after Nexus has retrieved the configuration from the coordinator during + -- the prepare phase of the protocol. + -- + -- Hex formatted string + share_digest STRING(64), + + PRIMARY KEY (rack_id, epoch, hw_baseboard_id) +); diff --git a/schema/crdb/add-trust-quorum/up08.sql b/schema/crdb/add-trust-quorum/up08.sql new file mode 100644 index 00000000000..3013cae9cb0 --- /dev/null +++ b/schema/crdb/add-trust-quorum/up08.sql @@ -0,0 +1,24 @@ +-- Ensure that each sled always has a `hw_baseboard_id`. +-- +-- It would be weird if this wasn't true, but we want to guarantee it before +-- upgrade from LRTQ to TQ. +INSERT INTO omicron.public.hw_baseboard_id + (id, part_number, serial_number) + SELECT + gen_random_uuid(), part_number, serial_number + FROM omicron.public.sled as sled + ON CONFLICT DO NOTHING; + + +-- Put all `hw_baseboard_id`s for non-expunged sleds into `lrtq_members` +INSERT INTO omicron.public.lrtq_members + (rack_id, hw_baseboard_id) + SELECT + sled.rack_id, hw.id + FROM omicron.public.sled as sled + INNER JOIN omicron.public.hw_baseboard_id as hw + ON + sled.part_number = hw.part_number + AND sled.serial_number = hw.serial_number + AND sled.sled_policy != 'expunged' +ON CONFLICT DO NOTHING; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index f0f5fd148a8..ee2f52e4991 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7352,6 +7352,126 @@ CREATE INDEX IF NOT EXISTS multicast_member_parent_state ON omicron.public.multi state ) WHERE time_deleted IS NULL; +-- An LRTQ configuration explicitly placed in the database via a DB migration +-- +-- LRTQ configurations are always epoch 1, and any subsequent trust quorum +-- configuration must have epoch > 1. +CREATE TABLE IF NOT EXISTS omicron.public.lrtq_member ( + -- Foreign key into the rack table + rack_id UUID NOT NULL, + + -- Foreign key into the `hw_baseboard_id` table + -- A sled can only be in one rack, hence the UNIQUE constraint. + hw_baseboard_id UUID NOT NULL UNIQUE, + + PRIMARY KEY (rack_id, hw_baseboard_id) +); + +-- The state of a given trust quorum configuration +CREATE TYPE IF NOT EXISTS omicron.public.trust_quorum_configuration_state AS ENUM ( + -- Nexus is waiting for prepare acknowledgments by polling the coordinator + -- These may come as part of a reconfiguration or LRTQ upgrade + 'preparing', + -- The configuration has committed to the dataabase, and nexus may still be + -- trying to inform nodes about the commit. + 'committed', + -- The configuration has aborted and will not commit. The epoch can be + -- skipped. + 'aborted' +); + +-- Information for tracking trust quorum memberships over time +CREATE TABLE IF NOT EXISTS omicron.public.trust_quorum_configuration ( + -- Foreign key into the rack table + rack_id UUID NOT NULL, + + -- Monotonically increasing version per rack_id + epoch INT8 NOT NULL, + + -- The current state of this configuration + state omicron.public.trust_quorum_configuration_state NOT NULL, + + -- The number of shares needed to compute the rack secret + -- + -- In some documentation we call this the `K` parameter. + threshold INT2 NOT NULL CHECK (threshold > 0), + + -- The number of additional nodes beyond threshold to commit + -- + -- This represents the number of prepared nodes that can be offline after + -- a commit at Nexus and still allow the secret to be reconstructed during + -- rack unlock. If this number is equivalent to the total membership (`N`) + -- minus `threshold` nodes, then all nodes in the membership set for this + -- epoch must ack a prepare for a commit to occur. By varying this value we + -- allow commit to occur even if some nodes haven't prepared, thus providing + -- fault tolerance during the prepare phase and also during unlock. + -- + -- In some documentation we call this the `Z` parameter. + commit_crash_tolerance INT2 NOT NULL CHECK (commit_crash_tolerance >= 0), + + -- Which member is coordinating the prepare phase of the protocol this epoch + -- Foreign key into the `hw_baseboard_id` table + coordinator UUID NOT NULL, + + -- Encrypted rack secrets for prior committed epochs + -- + -- These are only filled in during a reconfiguration and retrieved + -- during the prepare phase of the protocol by Nexus from the coordinator. + -- + -- Salt is a hex-encoded string + -- TODO: Add a check constraint that both are null or not null + encrypted_rack_secrets_salt STRING(64), + encrypted_rack_secrets BYTES, + + CONSTRAINT encrypted_rack_secrets_both_or_neither_null CHECK ( + (encrypted_rack_secrets_salt IS NULL + AND encrypted_rack_secrets IS NULL) + OR + (encrypted_rack_secrets_salt IS NOT NULL + AND encrypted_rack_secrets IS NOT NULL) + ), + + -- Each rack has its own trust quorum + PRIMARY KEY (rack_id, epoch) +); + +-- Whether a node has prepared or committed yet +CREATE TYPE IF NOT EXISTS omicron.public.trust_quorum_member_state AS ENUM ( + -- The node has not acknowledged either a `Prepare` or `Commit` message + 'unacked', + -- The node has acknoweledged a `Prepare` message + 'prepared', + -- The node has acknowledged a `Commit` or `PrepareAndCommit` message + -- `committed` implies `prepared` + 'committed' +); + +-- Total group membership in trust quorum for a given epoch +CREATE TABLE IF NOT EXISTS omicron.public.trust_quorum_member ( + -- Foreign key into the rack table + -- Foreign key into the `trust_quorum_configuration` table along with `epoch` + rack_id UUID NOT NULL, + + -- Foreign key into the `trust_quorum_configuration` table along with `rack_id` + epoch INT8 NOT NULL, + + -- Foreign key into the `hw_baseboard_id` table + hw_baseboard_id UUID NOT NULL, + + -- Whether a node has acknowledged a prepare or commit yet + state omicron.public.trust_quorum_member_state NOT NULL, + + -- The sha3-256 hash of the key share for this node. This is only filled in + -- after Nexus has retrieved the configuration from the coordinator during + -- the prepare phase of the protocol. + -- + -- Hex formatted string + share_digest STRING(64), + + PRIMARY KEY (rack_id, epoch, hw_baseboard_id) +); + + -- Keep this at the end of file so that the database does not contain a version -- until it is fully populated. INSERT INTO omicron.public.db_metadata ( @@ -7361,7 +7481,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '211.0.0', NULL) + (TRUE, NOW(), NOW(), '212.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/trust-quorum/protocol/src/crypto.rs b/trust-quorum/protocol/src/crypto.rs index 84ba89c4691..e56eb143cd1 100644 --- a/trust-quorum/protocol/src/crypto.rs +++ b/trust-quorum/protocol/src/crypto.rs @@ -306,8 +306,8 @@ impl Default for Salt { pub struct EncryptedRackSecrets { /// A random value used to derive the key to encrypt the rack secrets for /// prior committed epochs. - salt: Salt, - data: Box<[u8]>, + pub salt: Salt, + pub data: Box<[u8]>, } #[derive( diff --git a/trust-quorum/protocol/src/lib.rs b/trust-quorum/protocol/src/lib.rs index 44f0d75379c..5753f35dd83 100644 --- a/trust-quorum/protocol/src/lib.rs +++ b/trust-quorum/protocol/src/lib.rs @@ -9,7 +9,6 @@ //! All persistent state and all networking is managed outside of this //! implementation. -use crypto::Sha3_256Digest; use daft::Diffable; use derive_more::Display; use gfss::shamir::Share; @@ -42,7 +41,10 @@ pub use validators::{ }; pub use alarm::Alarm; -pub use crypto::{RackSecret, ReconstructedRackSecret}; +pub use crypto::{ + EncryptedRackSecrets, RackSecret, ReconstructedRackSecret, Salt, + Sha3_256Digest, +}; pub use messages::*; pub use node::{CommitError, Node, NodeDiff, PrepareAndCommitError}; // public only for docs. @@ -73,6 +75,10 @@ impl Epoch { pub fn next(&self) -> Epoch { Epoch(self.0.checked_add(1).expect("fewer than 2^64 epochs")) } + + pub fn previous(&self) -> Option { + self.0.checked_sub(1).map(Epoch) + } } /// The number of shares required to reconstruct the rack secret