Skip to content

Commit

Permalink
chore: average alerts across namespace for 1 hour (#10827)
Browse files Browse the repository at this point in the history
We now require the *average* of the increase in proven chain across a
namespace to be 0 for an *hour* to trigger a slack alert.
  • Loading branch information
just-mitch authored Dec 17, 2024
1 parent 4ac13e6 commit 962a7a2
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions spartan/metrics/terraform/grafana.tf
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ resource "grafana_mute_timing" "mute_timing_always" {
}
}

resource "grafana_rule_group" "rule_group_minutely" {
resource "grafana_rule_group" "rule_group_hourly" {
org_id = 1
name = "minutely-evaluation-group"
name = "hourly-evaluation-group"
folder_uid = grafana_folder.rule_folder.uid
interval_seconds = 60
interval_seconds = 3600

rule {
name = "Proven Chain is Live"
Expand All @@ -81,7 +81,7 @@ resource "grafana_rule_group" "rule_group_minutely" {
model = jsonencode({
disableTextWrap = false,
editorMode = "code",
expr = "increase(aztec_archiver_block_height{aztec_status=\"proven\"}[30m])",
expr = "avg by(k8s_namespace_name) (increase(aztec_archiver_block_height{aztec_status=\"proven\"}[60m]))",
fullMetaSearch = false,
includeNullMetadata = true,
instant = true,
Expand Down Expand Up @@ -118,15 +118,15 @@ resource "grafana_rule_group" "rule_group_minutely" {
expression = "A",
intervalMs = 1000,
maxDataPoints = 43200,
refId = "C",
refId = "B",
type = "threshold"
}
)
}

no_data_state = "NoData"
exec_err_state = "Error"
for = "1m"
for = "1h"
annotations = {}
labels = {}
is_paused = false
Expand Down

0 comments on commit 962a7a2

Please sign in to comment.