Add metrics tracking time to restore a database from disk (#3285)

gefjon · web-flow · commit 4521487f804b · 2025-09-24T17:44:23.000Z
# Description of Changes Inspired by a question @Shubham8287 asked me: how long does it take to replay real production databases, e.g. a BitCraft region module? I did not have a good answer, and didn't know how I could get an answer. Add 5 new metrics: - `replay_total_time_seconds(db: Identity)`, total time to read and restore snapshot and replay commitlog. - `replay_snapshot_read_time_seconds(db: Identity)`, time to read snapshot from disk. - `replay_snapshot_restore_time_seconds(db: Identity)`, time to restore snapshot once it's already in memory. - `replay_commitlog_time_seconds(db: Identity)`, time to replay commitlog once snapshot is restored. - `replay_commitlog_num_commits(db: Identity)`, number of commits visited during commitlog replay, i.e. the length of the suffix after the most recent usable snapshot. All of these are `Gauge`s (or `IntGauge` for the `num_commits` metric), as we expect observations to be very infrequent (approx. once per label value per SpacetimeDB process). # API and ABI breaking changes N/a # Expected complexity level and risk 1 # Testing I remain unsure how to test metrics. This won't break anything, though, so I'm not worried.
diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs
@@ -3,6 +3,7 @@ use crate::error::{DBError, DatabaseError, RestoreSnapshotError};
 use crate::messages::control_db::HostType;
 use crate::subscription::ExecutionCounters;
 use crate::util::{asyncify, spawn_rayon};
+use crate::worker_metrics::WORKER_METRICS;
 use anyhow::{anyhow, Context};
 use enum_map::EnumMap;
 use fs2::FileExt;
@@ -378,6 +379,9 @@ impl RelationalDB {
         let (min_commitlog_offset, _) = history.tx_range_hint();
 
         log::info!("[{database_identity}] DATABASE: durable_tx_offset is {durable_tx_offset:?}");
+
+        let start_time = std::time::Instant::now();
+
         let inner = Self::restore_from_snapshot_or_bootstrap(
             database_identity,
             snapshot_repo.as_deref(),
@@ -387,6 +391,13 @@ impl RelationalDB {
         )?;
 
         apply_history(&inner, database_identity, history)?;
+
+        let elapsed_time = start_time.elapsed();
+        WORKER_METRICS
+            .replay_total_time_seconds
+            .with_label_values(&database_identity)
+            .set(elapsed_time.as_secs_f64());
+
         let db = Self::new(
             lock,
             database_identity,
@@ -532,13 +543,20 @@ impl RelationalDB {
         ) -> Result<ReconstructedSnapshot, Box<SnapshotError>> {
             log::info!("[{database_identity}] DATABASE: restoring snapshot of tx_offset {snapshot_offset}");
             let start = std::time::Instant::now();
+
             let snapshot = snapshot_repo
                 .read_snapshot(snapshot_offset, page_pool)
                 .map_err(Box::new)?;
+
+            let elapsed_time = start.elapsed();
+
+            WORKER_METRICS
+                .replay_snapshot_read_time_seconds
+                .with_label_values(database_identity)
+                .set(elapsed_time.as_secs_f64());
+
             log::info!(
-                "[{database_identity}] DATABASE: read snapshot of tx_offset {} in {:?}",
-                snapshot_offset,
-                start.elapsed(),
+                "[{database_identity}] DATABASE: read snapshot of tx_offset {snapshot_offset} in {elapsed_time:?}",
             );
 
             Ok(snapshot)
@@ -554,10 +572,12 @@ impl RelationalDB {
             let snapshot_offset = snapshot.tx_offset;
             Locking::restore_from_snapshot(snapshot, page_pool)
                 .inspect(|_| {
+                    let elapsed_time = start.elapsed();
+
+                    WORKER_METRICS.replay_snapshot_restore_time_seconds.with_label_values(database_identity).set(elapsed_time.as_secs_f64());
+
                     log::info!(
-                        "[{database_identity}] DATABASE: restored from snapshot of tx_offset {} in {:?}",
-                        snapshot_offset,
-                        start.elapsed(),
+                        "[{database_identity}] DATABASE: restored from snapshot of tx_offset {snapshot_offset} in {elapsed_time:?}",
                     )
                 })
                 .inspect_err(|e| {
@@ -1602,11 +1622,26 @@ where
         }
     };
 
+    let time_before = std::time::Instant::now();
+
     let mut replay = datastore.replay(progress);
-    let start = replay.next_tx_offset();
+    let start_tx_offset = replay.next_tx_offset();
     history
-        .fold_transactions_from(start, &mut replay)
+        .fold_transactions_from(start_tx_offset, &mut replay)
         .map_err(anyhow::Error::from)?;
+
+    let time_elapsed = time_before.elapsed();
+    WORKER_METRICS
+        .replay_commitlog_time_seconds
+        .with_label_values(&database_identity)
+        .set(time_elapsed.as_secs_f64());
+
+    let end_tx_offset = replay.next_tx_offset();
+    WORKER_METRICS
+        .replay_commitlog_num_commits
+        .with_label_values(&database_identity)
+        .set((end_tx_offset - start_tx_offset) as _);
+
     log::info!("[{database_identity}] DATABASE: applied transaction history");
     datastore.rebuild_state_after_replay()?;
     log::info!("[{database_identity}] DATABASE: rebuilt state after replay");
diff --git a/crates/core/src/worker_metrics/mod.rs b/crates/core/src/worker_metrics/mod.rs
@@ -284,6 +284,34 @@ metrics_group!(
         #[help = "The number of server -> client WebSocket messages waiting in any client's outgoing queue"]
         #[labels(db: Identity)]
         pub total_outgoing_queue_length: IntGaugeVec,
+
+        #[name = spacetime_replay_total_time_seconds]
+        #[help = "Total time spent replaying a database upon restart, including snapshot read, snapshot restore and commitlog replay"]
+        #[labels(db: Identity)]
+        // We expect a small number of observations per label
+        // (exactly one, for non-replicated databases, and one per leader change for replicated databases)
+        // so we'll just store a `Gauge` with the most recent observation for each database.
+        pub replay_total_time_seconds: GaugeVec,
+
+        #[name = spacetime_replay_snapshot_read_time_seconds]
+        #[help = "Time spent reading a snapshot from disk before restoring the snapshot upon restart"]
+        #[labels(db: Identity)]
+        pub replay_snapshot_read_time_seconds: GaugeVec,
+
+        #[name = spacetime_replay_snapshot_restore_time_seconds]
+        #[help = "Time spent restoring a database from a snapshot after reading the snapshot and before commitlog replay upon restart"]
+        #[labels(db: Identity)]
+        pub replay_snapshot_restore_time_seconds: GaugeVec,
+
+        #[name = spacetime_replay_commitlog_time_seconds]
+        #[help = "Time spent replaying the commitlog after restoring from a snapshot upon restart"]
+        #[labels(db: Identity)]
+        pub replay_commitlog_time_seconds: GaugeVec,
+
+        #[name = spacetime_replay_commitlog_num_commits]
+        #[help = "Number of commits replayed after restoring from a snapshot upon restart"]
+        #[labels(db: Identity)]
+        pub replay_commitlog_num_commits: IntGaugeVec,
     }
 );