diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 15e674772654..d9feeeb3db23 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -65,6 +65,26 @@ pub static STORAGE_TIME_GLOBAL: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static LOOKUP_CACHE_TIME: Lazy = Lazy::new(|| { + register_histogram_vec!( + "pageserver_getpage_lookup_cache_seconds", + "Time spent in lookup_cached_page", + &["tenant_id", "timeline_id"], + get_buckets_for_critical_operations(), + ) + .expect("failed to define a metric") +}); + +static GET_RECONSTRUCT_DATA_TIME: Lazy = Lazy::new(|| { + register_histogram_vec!( + "pageserver_getpage_get_reconstruct_data_seconds", + "Time spent in get_reconstruct_data", + &["tenant_id", "timeline_id"], + get_buckets_for_critical_operations(), + ) + .expect("failed to define a metric") +}); + // Metrics collected on operations on the storage repository. static RECONSTRUCT_TIME: Lazy = Lazy::new(|| { register_histogram_vec!( @@ -493,6 +513,8 @@ impl StorageTimeMetrics { pub struct TimelineMetrics { tenant_id: String, timeline_id: String, + pub lookup_cache_time_histo: Histogram, + pub get_reconstruct_data_time_histo: Histogram, pub reconstruct_time_histo: Histogram, pub materialized_page_cache_hit_counter: GenericCounter, pub flush_time_histo: StorageTimeMetrics, @@ -515,6 +537,12 @@ impl TimelineMetrics { pub fn new(tenant_id: &TenantId, timeline_id: &TimelineId) -> Self { let tenant_id = tenant_id.to_string(); let timeline_id = timeline_id.to_string(); + let lookup_cache_time_histo = LOOKUP_CACHE_TIME + .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .unwrap(); + let get_reconstruct_data_time_histo = GET_RECONSTRUCT_DATA_TIME + .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .unwrap(); let reconstruct_time_histo = RECONSTRUCT_TIME .get_metric_with_label_values(&[&tenant_id, &timeline_id]) .unwrap(); @@ -553,6 +581,8 @@ impl TimelineMetrics { TimelineMetrics { tenant_id, timeline_id, + lookup_cache_time_histo, + get_reconstruct_data_time_histo, reconstruct_time_histo, materialized_page_cache_hit_counter, flush_time_histo, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 13dc0d1ddfcc..08b603e03f32 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -497,6 +497,7 @@ impl Timeline { // The cached image can be returned directly if there is no WAL between the cached image // and requested LSN. The cached image can also be used to reduce the amount of WAL needed // for redo. + let timer = self.metrics.lookup_cache_time_histo.start_timer(); let cached_page_img = match self.lookup_cached_page(&key, lsn) { Some((cached_lsn, cached_img)) => { match cached_lsn.cmp(&lsn) { @@ -510,14 +511,17 @@ impl Timeline { } None => None, }; + timer.stop_and_record(); let mut reconstruct_state = ValueReconstructState { records: Vec::new(), img: cached_page_img, }; + let timer = self.metrics.get_reconstruct_data_time_histo.start_timer(); self.get_reconstruct_data(key, lsn, &mut reconstruct_state, ctx) .await?; + timer.stop_and_record(); self.metrics .reconstruct_time_histo