Skip to content

Commit

Permalink
[Index Retention] Fix alerting for index retention (#25367)
Browse files Browse the repository at this point in the history
GitOrigin-RevId: ada561b1455d30e9f2e6fb3acfafeea8f362ee82
  • Loading branch information
jordanhunt22 authored and Convex, Inc. committed May 3, 2024
1 parent cd5777d commit 91061e6
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 6 deletions.
16 changes: 16 additions & 0 deletions crates/database/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,14 @@ pub fn log_retention_cursor_age(age_secs: f64) {
log_gauge(&RETENTION_CURSOR_AGE_SECONDS, age_secs)
}

register_convex_gauge!(
RETENTION_CURSOR_LAG_SECONDS,
"Lag between the retention cursor and the min index snapshot"
);
pub fn log_retention_cursor_lag(age_secs: f64) {
log_gauge(&RETENTION_CURSOR_LAG_SECONDS, age_secs)
}

register_convex_gauge!(
DOCUMENT_RETENTION_CURSOR_AGE_SECONDS,
"Age of the document retention cursor"
Expand All @@ -410,6 +418,14 @@ pub fn log_document_retention_cursor_age(age_secs: f64) {
log_gauge(&DOCUMENT_RETENTION_CURSOR_AGE_SECONDS, age_secs)
}

register_convex_gauge!(
DOCUMENT_RETENTION_CURSOR_LAG_SECONDS,
"Lag between the retention cursor and the min document snapshot"
);
pub fn log_document_retention_cursor_lag(age_secs: f64) {
log_gauge(&DOCUMENT_RETENTION_CURSOR_LAG_SECONDS, age_secs)
}

register_convex_gauge!(
RETENTION_MISSING_CURSOR_INFO,
"Index retention has no cursor"
Expand Down
34 changes: 28 additions & 6 deletions crates/database/src/retention.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,11 @@ use crate::{
latest_min_document_snapshot_timer,
latest_min_snapshot_timer,
log_document_retention_cursor_age,
log_document_retention_cursor_lag,
log_document_retention_no_cursor,
log_document_retention_scanned_document,
log_retention_cursor_age,
log_retention_cursor_lag,
log_retention_documents_deleted,
log_retention_expired_index_entry,
log_retention_index_entries_deleted,
Expand Down Expand Up @@ -441,6 +443,7 @@ impl<RT: Runtime> LeaderRetentionManager<RT> {
// even if the deletion future is stuck.
Self::get_checkpoint(
persistence.reader().as_ref(),
bounds_writer.reader(),
snapshot_reader.clone(),
retention_type,
)
Expand Down Expand Up @@ -1050,6 +1053,7 @@ impl<RT: Runtime> LeaderRetentionManager<RT> {
let _timer = retention_delete_timer();
let cursor = Self::get_checkpoint(
reader.as_ref(),
bounds_reader.clone(),
snapshot_reader.clone(),
RetentionType::Index,
)
Expand Down Expand Up @@ -1179,6 +1183,7 @@ impl<RT: Runtime> LeaderRetentionManager<RT> {
let _timer = retention_delete_documents_timer();
let cursor = Self::get_checkpoint(
reader.as_ref(),
bounds_reader.clone(),
snapshot_reader.clone(),
RetentionType::Document,
)
Expand Down Expand Up @@ -1268,6 +1273,7 @@ impl<RT: Runtime> LeaderRetentionManager<RT> {

async fn get_checkpoint(
persistence: &dyn PersistenceReader,
bounds_reader: Reader<SnapshotBounds>,
snapshot_reader: Reader<SnapshotManager>,
retention_type: RetentionType,
) -> anyhow::Result<Timestamp> {
Expand All @@ -1276,12 +1282,28 @@ impl<RT: Runtime> LeaderRetentionManager<RT> {
// Only log if the checkpoint has been written once, to avoid logging time since
// epoch when the instance is first starting up.
match retention_type {
RetentionType::Document => log_document_retention_cursor_age(
(*snapshot_reader.lock().latest_ts()).secs_since_f64(checkpoint),
),
RetentionType::Index => log_retention_cursor_age(
(*snapshot_reader.lock().latest_ts()).secs_since_f64(checkpoint),
),
RetentionType::Document => {
log_document_retention_cursor_age(
(*snapshot_reader.lock().latest_ts()).secs_since_f64(checkpoint),
);
log_document_retention_cursor_lag(
bounds_reader
.lock()
.min_document_snapshot_ts
.secs_since_f64(checkpoint),
);
},
RetentionType::Index => {
log_retention_cursor_age(
(*snapshot_reader.lock().latest_ts()).secs_since_f64(checkpoint),
);
log_retention_cursor_lag(
bounds_reader
.lock()
.min_snapshot_ts
.secs_since_f64(checkpoint),
);
},
}
} else {
match retention_type {
Expand Down

0 comments on commit 91061e6

Please sign in to comment.