Skip to content

Commit

Permalink
Include timeline region in the smgr metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
ctring committed Apr 25, 2023
1 parent f711291 commit 9d9bac9
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 48 deletions.
7 changes: 6 additions & 1 deletion pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,12 @@ pub static SMGR_QUERY_TIME: Lazy<HistogramVec> = Lazy::new(|| {
register_histogram_vec!(
"pageserver_smgr_query_seconds",
"Time spent on smgr query handling",
&["smgr_query_type", "tenant_id", "timeline_id"],
&[
"smgr_query_type",
"tenant_id",
"timeline_id",
"timeline_region"
],
SMGR_QUERY_TIME_BUCKETS.into()
)
.expect("failed to define a metric")
Expand Down
131 changes: 84 additions & 47 deletions pageserver/src/page_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,27 +238,52 @@ struct PageRequestMetrics {
}

impl PageRequestMetrics {
fn new(tenant_id: &TenantId, timeline_id: &TimelineId) -> Self {
fn new(tenant_id: &TenantId, timeline_id: &TimelineId, timeline_region: RegionId) -> Self {
let tenant_id = tenant_id.to_string();
let timeline_id = timeline_id.to_string();
let timeline_region = timeline_region.to_string();

let get_rel_exists =
SMGR_QUERY_TIME.with_label_values(&["get_rel_exists", &tenant_id, &timeline_id]);
let get_rel_exists = SMGR_QUERY_TIME.with_label_values(&[
"get_rel_exists",
&tenant_id,
&timeline_id,
&timeline_region,
]);

let get_rel_size =
SMGR_QUERY_TIME.with_label_values(&["get_rel_size", &tenant_id, &timeline_id]);
let get_rel_size = SMGR_QUERY_TIME.with_label_values(&[
"get_rel_size",
&tenant_id,
&timeline_id,
&timeline_region,
]);

let get_page_at_lsn =
SMGR_QUERY_TIME.with_label_values(&["get_page_at_lsn", &tenant_id, &timeline_id]);
let get_page_at_lsn = SMGR_QUERY_TIME.with_label_values(&[
"get_page_at_lsn",
&tenant_id,
&timeline_id,
&timeline_region,
]);

let get_db_size =
SMGR_QUERY_TIME.with_label_values(&["get_db_size", &tenant_id, &timeline_id]);
let get_db_size = SMGR_QUERY_TIME.with_label_values(&[
"get_db_size",
&tenant_id,
&timeline_id,
&timeline_region,
]);

let get_slru_page =
SMGR_QUERY_TIME.with_label_values(&["get_slru_page", &tenant_id, &timeline_id]);
let get_slru_page = SMGR_QUERY_TIME.with_label_values(&[
"get_slru_page",
&tenant_id,
&timeline_id,
&timeline_region,
]);

let get_latest_lsn =
SMGR_QUERY_TIME.with_label_values(&["get_latest_lsn", &tenant_id, &timeline_id]);
let get_latest_lsn = SMGR_QUERY_TIME.with_label_values(&[
"get_latest_lsn",
&tenant_id,
&timeline_id,
&timeline_region,
]);

Self {
get_rel_exists,
Expand Down Expand Up @@ -331,22 +356,23 @@ impl PageServerHandler {
get_timelines_indexed_by_region_id(&tenant)?
};

// Remotexact
let main_timeline = get_timeline_by_region_id(&timelines, RegionId(0)).unwrap();

// switch client to COPYBOTH
pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
pgb.flush().await?;

let empty_timeline_id = TimelineId::from([0u8; 16]);
let metrics = PageRequestMetrics::new(
&tenant_id,
if let Some(ref id) = timeline_id {
id
} else {
&empty_timeline_id
},
);
let metrics = timelines
.iter()
.map(|(region_id, timeline)| {
(
*region_id,
PageRequestMetrics::new(&tenant_id, &timeline.timeline_id, timeline.region_id),
)
})
.collect::<HashMap<_, _>>();

// Remotexact
let (main_timeline, _) =
get_timeline_and_metrics_by_region_id(&timelines, &metrics, RegionId(0)).unwrap();

loop {
let msg = tokio::select! {
Expand Down Expand Up @@ -392,9 +418,9 @@ impl PageServerHandler {
// the data added to the relation prior to the move.
let response = match neon_fe_msg {
PagestreamFeMessage::Exists(mut req) => {
let _timer = metrics.get_rel_exists.start_timer();
match get_timeline_by_region_id(&timelines, req.region) {
Ok(timeline) => {
match get_timeline_and_metrics_by_region_id(&timelines, &metrics, req.region) {
Ok((timeline, metrics)) => {
let _timer = metrics.get_rel_exists.start_timer();
match self
.handle_get_rel_exists_request(timeline.as_ref(), &req, &ctx)
.await
Expand All @@ -412,9 +438,9 @@ impl PageServerHandler {
}
}
PagestreamFeMessage::Nblocks(mut req) => {
let _timer = metrics.get_rel_size.start_timer();
match get_timeline_by_region_id(&timelines, req.region) {
Ok(timeline) => {
match get_timeline_and_metrics_by_region_id(&timelines, &metrics, req.region) {
Ok((timeline, metrics)) => {
let _timer = metrics.get_rel_size.start_timer();
match self.handle_get_nblocks_request(&timeline, &req, &ctx).await {
res @ Ok(_) => res,
Err(_) => {
Expand All @@ -429,9 +455,9 @@ impl PageServerHandler {
}
}
PagestreamFeMessage::GetPage(mut req) => {
let _timer = metrics.get_page_at_lsn.start_timer();
match get_timeline_by_region_id(&timelines, req.region) {
Ok(timeline) => {
match get_timeline_and_metrics_by_region_id(&timelines, &metrics, req.region) {
Ok((timeline, metrics)) => {
let _timer = metrics.get_page_at_lsn.start_timer();
match self
.handle_get_page_at_lsn_request(&timeline, &req, &ctx)
.await
Expand All @@ -449,26 +475,30 @@ impl PageServerHandler {
}
}
PagestreamFeMessage::DbSize(req) => {
let _timer = metrics.get_db_size.start_timer();
match get_timeline_by_region_id(&timelines, req.region) {
Ok(timeline) => self.handle_db_size_request(&timeline, &req, &ctx).await,
match get_timeline_and_metrics_by_region_id(&timelines, &metrics, req.region) {
Ok((timeline, metrics)) => {
let _timer = metrics.get_db_size.start_timer();
self.handle_db_size_request(&timeline, &req, &ctx).await
}
Err(e) => Err(e),
}
}
PagestreamFeMessage::GetSlruPage(req) => {
let _timer = metrics.get_slru_page.start_timer();
match get_timeline_by_region_id(&timelines, req.region) {
Ok(timeline) => {
match get_timeline_and_metrics_by_region_id(&timelines, &metrics, req.region) {
Ok((timeline, metrics)) => {
let _timer = metrics.get_slru_page.start_timer();
self.handle_get_slru_page_at_lsn_request(&timeline, &req, &ctx)
.await
}
Err(e) => Err(e),
}
}
PagestreamFeMessage::GetLatestLsn(req) => {
let _timer = metrics.get_latest_lsn.start_timer();
match get_timeline_by_region_id(&timelines, req.region) {
Ok(timeline) => self.handle_get_latest_lsn_request(&timeline, &ctx).await,
match get_timeline_and_metrics_by_region_id(&timelines, &metrics, req.region) {
Ok((timeline, metrics)) => {
let _timer = metrics.get_latest_lsn.start_timer();
self.handle_get_latest_lsn_request(&timeline, &ctx).await
}
Err(e) => Err(e),
}
}
Expand Down Expand Up @@ -1304,12 +1334,19 @@ fn get_timelines_indexed_by_region_id(
Ok(map)
}

fn get_timeline_by_region_id(
index: &HashMap<RegionId, Arc<Timeline>>,
fn get_timeline_and_metrics_by_region_id<'a>(
timeline_index: &HashMap<RegionId, Arc<Timeline>>,
metrics_index: &'a HashMap<RegionId, PageRequestMetrics>,
region_id: RegionId,
) -> anyhow::Result<Arc<Timeline>> {
index
) -> anyhow::Result<(Arc<Timeline>, &'a PageRequestMetrics)> {
timeline_index
.get(&region_id)
.map(Arc::to_owned)
.ok_or_else(|| anyhow::anyhow!("region {} does not exists", region_id))
.and_then(|timeline| {
metrics_index
.get(&region_id)
.ok_or_else(|| anyhow::anyhow!("region {} does not have metrics", region_id))
.map(|metrics| (timeline, metrics))
})
}

0 comments on commit 9d9bac9

Please sign in to comment.