From d9d3431f2654c5020d91b527900fd6fc7f85226c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alby=20Hern=C3=A1ndez?= Date: Mon, 19 Jun 2023 15:44:20 +0100 Subject: [PATCH 1/3] feat: Add exhaustion label to alerts --- slo/rules.go | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/slo/rules.go b/slo/rules.go index 537caeb15..5845bd733 100644 --- a/slo/rules.go +++ b/slo/rules.go @@ -120,10 +120,19 @@ func (o Objective) Burnrates() (monitoringv1.RuleGroup, error) { } } } + + // Propagate useful SLO information to alerts' labels alertLabels["short"] = model.Duration(w.Short).String() alertLabels["long"] = model.Duration(w.Long).String() alertLabels["severity"] = string(w.Severity) + // Calculate exhaustion and propagate it as alert label + parsedLongWindowDuration, _ := model.ParseDuration(o.Window.String()) + parsedWindowDuration := time.Duration(parsedLongWindowDuration) + exhaustionCalculation := int(parsedWindowDuration.Seconds()) / int(w.Factor) + parsedExhaustionCalculation, _ := model.ParseDuration(fmt.Sprintf("%ds", exhaustionCalculation)) + alertLabels["exhaustion"] = parsedExhaustionCalculation.String() + r := monitoringv1.Rule{ Alert: o.AlertName(), // TODO: Use expr replacer @@ -205,10 +214,19 @@ func (o Objective) Burnrates() (monitoringv1.RuleGroup, error) { } } } + + // Propagate useful SLO information to alerts' labels alertLabels["short"] = model.Duration(w.Short).String() alertLabels["long"] = model.Duration(w.Long).String() alertLabels["severity"] = string(w.Severity) + // Calculate exhaustion and propagate it as alert label + parsedLongWindowDuration, _ := model.ParseDuration(o.Window.String()) + parsedWindowDuration := time.Duration(parsedLongWindowDuration) + exhaustionCalculation := int(parsedWindowDuration.Seconds()) / int(w.Factor) + parsedExhaustionCalculation, _ := model.ParseDuration(fmt.Sprintf("%ds", exhaustionCalculation)) + alertLabels["exhaustion"] = parsedExhaustionCalculation.String() + r := monitoringv1.Rule{ Alert: o.AlertName(), // TODO: Use expr replacer @@ -290,10 +308,19 @@ func (o Objective) Burnrates() (monitoringv1.RuleGroup, error) { } } } + + // Propagate useful SLO information to alerts' labels alertLabels["short"] = model.Duration(w.Short).String() alertLabels["long"] = model.Duration(w.Long).String() alertLabels["severity"] = string(w.Severity) + // Calculate exhaustion and propagate it as alert label + parsedLongWindowDuration, _ := model.ParseDuration(o.Window.String()) + parsedWindowDuration := time.Duration(parsedLongWindowDuration) + exhaustionCalculation := int(parsedWindowDuration.Seconds()) / int(w.Factor) + parsedExhaustionCalculation, _ := model.ParseDuration(fmt.Sprintf("%ds", exhaustionCalculation)) + alertLabels["exhaustion"] = parsedExhaustionCalculation.String() + r := monitoringv1.Rule{ Alert: o.AlertName(), // TODO: Use expr replacer @@ -375,10 +402,19 @@ func (o Objective) Burnrates() (monitoringv1.RuleGroup, error) { } } } + + // Propagate useful SLO information to alerts' labels alertLabels["short"] = model.Duration(w.Short).String() alertLabels["long"] = model.Duration(w.Long).String() alertLabels["severity"] = string(w.Severity) + // Calculate exhaustion and propagate it as alert label + parsedLongWindowDuration, _ := model.ParseDuration(o.Window.String()) + parsedWindowDuration := time.Duration(parsedLongWindowDuration) + exhaustionCalculation := int(parsedWindowDuration.Seconds()) / int(w.Factor) + parsedExhaustionCalculation, _ := model.ParseDuration(fmt.Sprintf("%ds", exhaustionCalculation)) + alertLabels["exhaustion"] = parsedExhaustionCalculation.String() + r := monitoringv1.Rule{ Alert: o.AlertName(), // TODO: Use expr replacer From 54d689d2054ee88227c952e3444cd8c026cf4e65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alby=20Hern=C3=A1ndez?= Date: Mon, 19 Jun 2023 17:11:18 +0100 Subject: [PATCH 2/3] fix: tests with exhaustion label on alerts --- .../controllers/servicelevelobjective_test.go | 12 +- slo/rules_test.go | 144 +++++++++--------- 2 files changed, 80 insertions(+), 76 deletions(-) diff --git a/kubernetes/controllers/servicelevelobjective_test.go b/kubernetes/controllers/servicelevelobjective_test.go index 07d308e58..70568901b 100644 --- a/kubernetes/controllers/servicelevelobjective_test.go +++ b/kubernetes/controllers/servicelevelobjective_test.go @@ -155,28 +155,28 @@ func Test_makePrometheusRule(t *testing.T) { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`http_requests:burnrate5m{job="app",slo="http"} > (14 * (1-0.995)) and http_requests:burnrate1h{job="app",slo="http"} > (14 * (1-0.995))`), For: "2m", - Labels: map[string]string{"severity": "critical", "job": "app", "long": "1h", "slo": "http", "short": "5m", "team": "foo"}, + Labels: map[string]string{"severity": "critical", "job": "app", "long": "1h", "slo": "http", "short": "5m", "team": "foo", "exhaustion": "2d"}, Annotations: map[string]string{"description": "foo"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`http_requests:burnrate30m{job="app",slo="http"} > (7 * (1-0.995)) and http_requests:burnrate6h{job="app",slo="http"} > (7 * (1-0.995))`), For: "15m", - Labels: map[string]string{"severity": "critical", "job": "app", "long": "6h", "slo": "http", "short": "30m", "team": "foo"}, + Labels: map[string]string{"severity": "critical", "job": "app", "long": "6h", "slo": "http", "short": "30m", "team": "foo", "exhaustion": "4d"}, Annotations: map[string]string{"description": "foo"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`http_requests:burnrate2h{job="app",slo="http"} > (2 * (1-0.995)) and http_requests:burnrate1d{job="app",slo="http"} > (2 * (1-0.995))`), For: "1h", - Labels: map[string]string{"severity": "warning", "job": "app", "long": "1d", "slo": "http", "short": "2h", "team": "foo"}, + Labels: map[string]string{"severity": "warning", "job": "app", "long": "1d", "slo": "http", "short": "2h", "team": "foo", "exhaustion": "2w"}, Annotations: map[string]string{"description": "foo"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`http_requests:burnrate6h{job="app",slo="http"} > (1 * (1-0.995)) and http_requests:burnrate4d{job="app",slo="http"} > (1 * (1-0.995))`), For: "3h", - Labels: map[string]string{"severity": "warning", "job": "app", "long": "4d", "slo": "http", "short": "6h", "team": "foo"}, + Labels: map[string]string{"severity": "warning", "job": "app", "long": "4d", "slo": "http", "short": "6h", "team": "foo", "exhaustion": "4w"}, Annotations: map[string]string{"description": "foo"}, }, }, @@ -268,6 +268,7 @@ func Test_makeConfigMap(t *testing.T) { > (14 * (1-0.995)) for: 2m labels: + exhaustion: 2d job: app long: 1h severity: critical @@ -281,6 +282,7 @@ func Test_makeConfigMap(t *testing.T) { > (7 * (1-0.995)) for: 15m labels: + exhaustion: 4d job: app long: 6h severity: critical @@ -294,6 +296,7 @@ func Test_makeConfigMap(t *testing.T) { > (2 * (1-0.995)) for: 1h labels: + exhaustion: 2w job: app long: 1d severity: warning @@ -307,6 +310,7 @@ func Test_makeConfigMap(t *testing.T) { > (1 * (1-0.995)) for: 3h labels: + exhaustion: 4w job: app long: 4d severity: warning diff --git a/slo/rules_test.go b/slo/rules_test.go index 909562397..f7201d01b 100644 --- a/slo/rules_test.go +++ b/slo/rules_test.go @@ -52,22 +52,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", For: "2m", Expr: intstr.FromString(`http_requests:burnrate5m{job="thanos-receive-default",slo="monitoring-http-errors"} > (14 * (1-0.99)) and http_requests:burnrate1h{job="thanos-receive-default",slo="monitoring-http-errors"} > (14 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "job": "thanos-receive-default", "long": "1h", "slo": "monitoring-http-errors", "short": "5m"}, + Labels: map[string]string{"severity": "critical", "job": "thanos-receive-default", "long": "1h", "slo": "monitoring-http-errors", "short": "5m", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", For: "15m", Expr: intstr.FromString(`http_requests:burnrate30m{job="thanos-receive-default",slo="monitoring-http-errors"} > (7 * (1-0.99)) and http_requests:burnrate6h{job="thanos-receive-default",slo="monitoring-http-errors"} > (7 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "job": "thanos-receive-default", "long": "6h", "slo": "monitoring-http-errors", "short": "30m"}, + Labels: map[string]string{"severity": "critical", "job": "thanos-receive-default", "long": "6h", "slo": "monitoring-http-errors", "short": "30m", "exhaustion": "4d"}, }, { Alert: "ErrorBudgetBurn", For: "1h", Expr: intstr.FromString(`http_requests:burnrate2h{job="thanos-receive-default",slo="monitoring-http-errors"} > (2 * (1-0.99)) and http_requests:burnrate1d{job="thanos-receive-default",slo="monitoring-http-errors"} > (2 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "job": "thanos-receive-default", "long": "1d", "slo": "monitoring-http-errors", "short": "2h"}, + Labels: map[string]string{"severity": "warning", "job": "thanos-receive-default", "long": "1d", "slo": "monitoring-http-errors", "short": "2h", "exhaustion": "2w"}, }, { Alert: "ErrorBudgetBurn", For: "3h", Expr: intstr.FromString(`http_requests:burnrate6h{job="thanos-receive-default",slo="monitoring-http-errors"} > (1 * (1-0.99)) and http_requests:burnrate4d{job="thanos-receive-default",slo="monitoring-http-errors"} > (1 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "job": "thanos-receive-default", "long": "4d", "slo": "monitoring-http-errors", "short": "6h"}, + Labels: map[string]string{"severity": "warning", "job": "thanos-receive-default", "long": "4d", "slo": "monitoring-http-errors", "short": "6h", "exhaustion": "4w"}, }}, }, }, { @@ -108,22 +108,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", For: "2m", Expr: intstr.FromString(`http_requests:burnrate5m{job="thanos-receive-default",slo="monitoring-http-errors"} > (14 * (1-0.99)) and http_requests:burnrate1h{job="thanos-receive-default",slo="monitoring-http-errors"} > (14 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "1h", "slo": "monitoring-http-errors", "short": "5m"}, + Labels: map[string]string{"severity": "critical", "long": "1h", "slo": "monitoring-http-errors", "short": "5m", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", For: "15m", Expr: intstr.FromString(`http_requests:burnrate30m{job="thanos-receive-default",slo="monitoring-http-errors"} > (7 * (1-0.99)) and http_requests:burnrate6h{job="thanos-receive-default",slo="monitoring-http-errors"} > (7 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "6h", "slo": "monitoring-http-errors", "short": "30m"}, + Labels: map[string]string{"severity": "critical", "long": "6h", "slo": "monitoring-http-errors", "short": "30m", "exhaustion": "4d"}, }, { Alert: "ErrorBudgetBurn", For: "1h", Expr: intstr.FromString(`http_requests:burnrate2h{job="thanos-receive-default",slo="monitoring-http-errors"} > (2 * (1-0.99)) and http_requests:burnrate1d{job="thanos-receive-default",slo="monitoring-http-errors"} > (2 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "1d", "slo": "monitoring-http-errors", "short": "2h"}, + Labels: map[string]string{"severity": "warning", "long": "1d", "slo": "monitoring-http-errors", "short": "2h", "exhaustion": "2w"}, }, { Alert: "ErrorBudgetBurn", For: "3h", Expr: intstr.FromString(`http_requests:burnrate6h{job="thanos-receive-default",slo="monitoring-http-errors"} > (1 * (1-0.99)) and http_requests:burnrate4d{job="thanos-receive-default",slo="monitoring-http-errors"} > (1 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "4d", "slo": "monitoring-http-errors", "short": "6h"}, + Labels: map[string]string{"severity": "warning", "long": "4d", "slo": "monitoring-http-errors", "short": "6h", "exhaustion": "4w"}, }}, }, }, { @@ -164,22 +164,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", For: "2m", Expr: intstr.FromString(`http_requests:burnrate5m{handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"} > (14 * (1-0.99)) and http_requests:burnrate1h{handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"} > (14 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "1h", "short": "5m", "slo": "monitoring-http-errors"}, + Labels: map[string]string{"severity": "critical", "long": "1h", "short": "5m", "slo": "monitoring-http-errors", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", For: "15m", Expr: intstr.FromString(`http_requests:burnrate30m{handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"} > (7 * (1-0.99)) and http_requests:burnrate6h{handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"} > (7 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "6h", "slo": "monitoring-http-errors", "short": "30m"}, + Labels: map[string]string{"severity": "critical", "long": "6h", "slo": "monitoring-http-errors", "short": "30m", "exhaustion": "4d"}, }, { Alert: "ErrorBudgetBurn", For: "1h", Expr: intstr.FromString(`http_requests:burnrate2h{handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"} > (2 * (1-0.99)) and http_requests:burnrate1d{handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"} > (2 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "1d", "slo": "monitoring-http-errors", "short": "2h"}, + Labels: map[string]string{"severity": "warning", "long": "1d", "slo": "monitoring-http-errors", "short": "2h", "exhaustion": "2w"}, }, { Alert: "ErrorBudgetBurn", For: "3h", Expr: intstr.FromString(`http_requests:burnrate6h{handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"} > (1 * (1-0.99)) and http_requests:burnrate4d{handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"} > (1 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "4d", "slo": "monitoring-http-errors", "short": "6h"}, + Labels: map[string]string{"severity": "warning", "long": "4d", "slo": "monitoring-http-errors", "short": "6h", "exhaustion": "4w"}, }}, }, }, { @@ -220,22 +220,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handled:burnrate5m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (14 * (1-0.999)) and grpc_server_handled:burnrate1h{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (14 * (1-0.999))`), For: "2m", - Labels: map[string]string{"severity": "critical", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "job": "api", "slo": "monitoring-grpc-errors", "short": "5m", "long": "1h"}, + Labels: map[string]string{"severity": "critical", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "job": "api", "slo": "monitoring-grpc-errors", "short": "5m", "long": "1h", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handled:burnrate30m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (7 * (1-0.999)) and grpc_server_handled:burnrate6h{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (7 * (1-0.999))`), For: "15m", - Labels: map[string]string{"severity": "critical", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "job": "api", "slo": "monitoring-grpc-errors", "short": "30m", "long": "6h"}, + Labels: map[string]string{"severity": "critical", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "job": "api", "slo": "monitoring-grpc-errors", "short": "30m", "long": "6h", "exhaustion": "4d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handled:burnrate2h{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (2 * (1-0.999)) and grpc_server_handled:burnrate1d{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (2 * (1-0.999))`), For: "1h", - Labels: map[string]string{"severity": "warning", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "job": "api", "slo": "monitoring-grpc-errors", "short": "2h", "long": "1d"}, + Labels: map[string]string{"severity": "warning", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "job": "api", "slo": "monitoring-grpc-errors", "short": "2h", "long": "1d", "exhaustion": "2w"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handled:burnrate6h{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (1 * (1-0.999)) and grpc_server_handled:burnrate4d{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (1 * (1-0.999))`), For: "3h", - Labels: map[string]string{"severity": "warning", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "job": "api", "slo": "monitoring-grpc-errors", "short": "6h", "long": "4d"}, + Labels: map[string]string{"severity": "warning", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "job": "api", "slo": "monitoring-grpc-errors", "short": "6h", "long": "4d", "exhaustion": "4w"}, }}, }, }, { @@ -276,22 +276,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handled:burnrate5m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (14 * (1-0.999)) and grpc_server_handled:burnrate1h{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (14 * (1-0.999))`), For: "2m", - Labels: map[string]string{"severity": "critical", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "slo": "monitoring-grpc-errors", "short": "5m", "long": "1h"}, + Labels: map[string]string{"severity": "critical", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "slo": "monitoring-grpc-errors", "short": "5m", "long": "1h", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handled:burnrate30m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (7 * (1-0.999)) and grpc_server_handled:burnrate6h{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (7 * (1-0.999))`), For: "15m", - Labels: map[string]string{"severity": "critical", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "slo": "monitoring-grpc-errors", "short": "30m", "long": "6h"}, + Labels: map[string]string{"severity": "critical", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "slo": "monitoring-grpc-errors", "short": "30m", "long": "6h", "exhaustion": "4d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handled:burnrate2h{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (2 * (1-0.999)) and grpc_server_handled:burnrate1d{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (2 * (1-0.999))`), For: "1h", - Labels: map[string]string{"severity": "warning", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "slo": "monitoring-grpc-errors", "short": "2h", "long": "1d"}, + Labels: map[string]string{"severity": "warning", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "slo": "monitoring-grpc-errors", "short": "2h", "long": "1d", "exhaustion": "2w"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handled:burnrate6h{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (1 * (1-0.999)) and grpc_server_handled:burnrate4d{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"} > (1 * (1-0.999))`), For: "3h", - Labels: map[string]string{"severity": "warning", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "slo": "monitoring-grpc-errors", "short": "6h", "long": "4d"}, + Labels: map[string]string{"severity": "warning", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "slo": "monitoring-grpc-errors", "short": "6h", "long": "4d", "exhaustion": "4w"}, }}, }, }, { @@ -332,22 +332,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", For: "2m", Expr: intstr.FromString(`http_request_duration_seconds:burnrate5m{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (14 * (1-0.995)) and http_request_duration_seconds:burnrate1h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (14 * (1-0.995))`), - Labels: map[string]string{"severity": "critical", "long": "1h", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "5m"}, + Labels: map[string]string{"severity": "critical", "long": "1h", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "5m", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", For: "15m", Expr: intstr.FromString(`http_request_duration_seconds:burnrate30m{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (7 * (1-0.995)) and http_request_duration_seconds:burnrate6h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (7 * (1-0.995))`), - Labels: map[string]string{"severity": "critical", "long": "6h", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "30m"}, + Labels: map[string]string{"severity": "critical", "long": "6h", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "30m", "exhaustion": "4d"}, }, { Alert: "ErrorBudgetBurn", For: "1h", Expr: intstr.FromString(`http_request_duration_seconds:burnrate2h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (2 * (1-0.995)) and http_request_duration_seconds:burnrate1d{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (2 * (1-0.995))`), - Labels: map[string]string{"severity": "warning", "long": "1d", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "2h"}, + Labels: map[string]string{"severity": "warning", "long": "1d", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "2h", "exhaustion": "2w"}, }, { Alert: "ErrorBudgetBurn", For: "3h", Expr: intstr.FromString(`http_request_duration_seconds:burnrate6h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (1 * (1-0.995)) and http_request_duration_seconds:burnrate4d{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (1 * (1-0.995))`), - Labels: map[string]string{"severity": "warning", "long": "4d", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "6h"}, + Labels: map[string]string{"severity": "warning", "long": "4d", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "6h", "exhaustion": "4w"}, }}, }, }, { @@ -388,22 +388,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", For: "2m", Expr: intstr.FromString(`http_request_duration_seconds:burnrate5m{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (14 * (1-0.995)) and http_request_duration_seconds:burnrate1h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (14 * (1-0.995))`), - Labels: map[string]string{"severity": "critical", "long": "1h", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "5m"}, + Labels: map[string]string{"severity": "critical", "long": "1h", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "5m", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", For: "15m", Expr: intstr.FromString(`http_request_duration_seconds:burnrate30m{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (7 * (1-0.995)) and http_request_duration_seconds:burnrate6h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (7 * (1-0.995))`), - Labels: map[string]string{"severity": "critical", "long": "6h", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "30m"}, + Labels: map[string]string{"severity": "critical", "long": "6h", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "30m", "exhaustion": "4d"}, }, { Alert: "ErrorBudgetBurn", For: "1h", Expr: intstr.FromString(`http_request_duration_seconds:burnrate2h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (2 * (1-0.995)) and http_request_duration_seconds:burnrate1d{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (2 * (1-0.995))`), - Labels: map[string]string{"severity": "warning", "long": "1d", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "2h"}, + Labels: map[string]string{"severity": "warning", "long": "1d", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "2h", "exhaustion": "2w"}, }, { Alert: "ErrorBudgetBurn", For: "3h", Expr: intstr.FromString(`http_request_duration_seconds:burnrate6h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (1 * (1-0.995)) and http_request_duration_seconds:burnrate4d{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (1 * (1-0.995))`), - Labels: map[string]string{"severity": "warning", "long": "4d", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "6h"}, + Labels: map[string]string{"severity": "warning", "long": "4d", "job": "metrics-service-thanos-receive-default", "slo": "monitoring-http-latency", "short": "6h", "exhaustion": "4w"}, }}, }, }, { @@ -444,22 +444,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", For: "2m", Expr: intstr.FromString(`http_request_duration_seconds:burnrate5m{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (14 * (1-0.995)) and http_request_duration_seconds:burnrate1h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (14 * (1-0.995))`), - Labels: map[string]string{"severity": "critical", "long": "1h", "slo": "monitoring-http-latency", "short": "5m"}, + Labels: map[string]string{"severity": "critical", "long": "1h", "slo": "monitoring-http-latency", "short": "5m", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", For: "15m", Expr: intstr.FromString(`http_request_duration_seconds:burnrate30m{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (7 * (1-0.995)) and http_request_duration_seconds:burnrate6h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (7 * (1-0.995))`), - Labels: map[string]string{"severity": "critical", "long": "6h", "slo": "monitoring-http-latency", "short": "30m"}, + Labels: map[string]string{"severity": "critical", "long": "6h", "slo": "monitoring-http-latency", "short": "30m", "exhaustion": "4d"}, }, { Alert: "ErrorBudgetBurn", For: "1h", Expr: intstr.FromString(`http_request_duration_seconds:burnrate2h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (2 * (1-0.995)) and http_request_duration_seconds:burnrate1d{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (2 * (1-0.995))`), - Labels: map[string]string{"severity": "warning", "long": "1d", "slo": "monitoring-http-latency", "short": "2h"}, + Labels: map[string]string{"severity": "warning", "long": "1d", "slo": "monitoring-http-latency", "short": "2h", "exhaustion": "2w"}, }, { Alert: "ErrorBudgetBurn", For: "3h", Expr: intstr.FromString(`http_request_duration_seconds:burnrate6h{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (1 * (1-0.995)) and http_request_duration_seconds:burnrate4d{job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (1 * (1-0.995))`), - Labels: map[string]string{"severity": "warning", "long": "4d", "slo": "monitoring-http-latency", "short": "6h"}, + Labels: map[string]string{"severity": "warning", "long": "4d", "slo": "monitoring-http-latency", "short": "6h", "exhaustion": "4w"}, }}, }, }, { @@ -500,22 +500,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`http_request_duration_seconds:burnrate5m{handler=~"/api.*",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (14 * (1-0.995)) and http_request_duration_seconds:burnrate1h{handler=~"/api.*",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (14 * (1-0.995))`), For: "2m", - Labels: map[string]string{"severity": "critical", "long": "1h", "short": "5m", "slo": "monitoring-http-latency"}, + Labels: map[string]string{"severity": "critical", "long": "1h", "short": "5m", "slo": "monitoring-http-latency", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`http_request_duration_seconds:burnrate30m{handler=~"/api.*",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (7 * (1-0.995)) and http_request_duration_seconds:burnrate6h{handler=~"/api.*",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (7 * (1-0.995))`), For: "15m", - Labels: map[string]string{"severity": "critical", "long": "6h", "short": "30m", "slo": "monitoring-http-latency"}, + Labels: map[string]string{"severity": "critical", "long": "6h", "short": "30m", "slo": "monitoring-http-latency", "exhaustion": "4d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`http_request_duration_seconds:burnrate2h{handler=~"/api.*",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (2 * (1-0.995)) and http_request_duration_seconds:burnrate1d{handler=~"/api.*",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (2 * (1-0.995))`), For: "1h", - Labels: map[string]string{"severity": "warning", "long": "1d", "short": "2h", "slo": "monitoring-http-latency"}, + Labels: map[string]string{"severity": "warning", "long": "1d", "short": "2h", "slo": "monitoring-http-latency", "exhaustion": "2w"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`http_request_duration_seconds:burnrate6h{handler=~"/api.*",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (1 * (1-0.995)) and http_request_duration_seconds:burnrate4d{handler=~"/api.*",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"} > (1 * (1-0.995))`), For: "3h", - Labels: map[string]string{"severity": "warning", "long": "4d", "short": "6h", "slo": "monitoring-http-latency"}, + Labels: map[string]string{"severity": "warning", "long": "4d", "short": "6h", "slo": "monitoring-http-latency", "exhaustion": "4w"}, }}, }, }, { @@ -556,22 +556,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handling_seconds:burnrate1m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (14 * (1-0.995)) and grpc_server_handling_seconds:burnrate15m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (14 * (1-0.995))`), For: "1m", - Labels: map[string]string{"severity": "critical", "long": "15m", "short": "1m", "slo": "monitoring-grpc-latency", "job": "api", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore"}, + Labels: map[string]string{"severity": "critical", "long": "15m", "short": "1m", "slo": "monitoring-grpc-latency", "job": "api", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "exhaustion": "12h"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handling_seconds:burnrate8m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (7 * (1-0.995)) and grpc_server_handling_seconds:burnrate1h30m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (7 * (1-0.995))`), For: "4m", - Labels: map[string]string{"severity": "critical", "long": "1h30m", "short": "8m", "slo": "monitoring-grpc-latency", "job": "api", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore"}, + Labels: map[string]string{"severity": "critical", "long": "1h30m", "short": "8m", "slo": "monitoring-grpc-latency", "job": "api", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "exhaustion": "1d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handling_seconds:burnrate30m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (2 * (1-0.995)) and grpc_server_handling_seconds:burnrate6h{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (2 * (1-0.995))`), For: "15m", - Labels: map[string]string{"severity": "warning", "long": "6h", "short": "30m", "slo": "monitoring-grpc-latency", "job": "api", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore"}, + Labels: map[string]string{"severity": "warning", "long": "6h", "short": "30m", "slo": "monitoring-grpc-latency", "job": "api", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "exhaustion": "3d12h"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handling_seconds:burnrate1h30m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (1 * (1-0.995)) and grpc_server_handling_seconds:burnrate1d{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (1 * (1-0.995))`), For: "45m", - Labels: map[string]string{"severity": "warning", "long": "1d", "short": "1h30m", "slo": "monitoring-grpc-latency", "job": "api", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore"}, + Labels: map[string]string{"severity": "warning", "long": "1d", "short": "1h30m", "slo": "monitoring-grpc-latency", "job": "api", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "exhaustion": "1w"}, }}, }, }, { @@ -612,22 +612,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handling_seconds:burnrate1m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (14 * (1-0.995)) and grpc_server_handling_seconds:burnrate15m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (14 * (1-0.995))`), For: "1m", - Labels: map[string]string{"severity": "critical", "long": "15m", "short": "1m", "slo": "monitoring-grpc-latency", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore"}, + Labels: map[string]string{"severity": "critical", "long": "15m", "short": "1m", "slo": "monitoring-grpc-latency", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "exhaustion": "12h"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handling_seconds:burnrate8m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (7 * (1-0.995)) and grpc_server_handling_seconds:burnrate1h30m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (7 * (1-0.995))`), For: "4m", - Labels: map[string]string{"severity": "critical", "long": "1h30m", "short": "8m", "slo": "monitoring-grpc-latency", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore"}, + Labels: map[string]string{"severity": "critical", "long": "1h30m", "short": "8m", "slo": "monitoring-grpc-latency", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "exhaustion": "1d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handling_seconds:burnrate30m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (2 * (1-0.995)) and grpc_server_handling_seconds:burnrate6h{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (2 * (1-0.995))`), For: "15m", - Labels: map[string]string{"severity": "warning", "long": "6h", "short": "30m", "slo": "monitoring-grpc-latency", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore"}, + Labels: map[string]string{"severity": "warning", "long": "6h", "short": "30m", "slo": "monitoring-grpc-latency", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "exhaustion": "3d12h"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`grpc_server_handling_seconds:burnrate1h30m{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (1 * (1-0.995)) and grpc_server_handling_seconds:burnrate1d{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"} > (1 * (1-0.995))`), For: "45m", - Labels: map[string]string{"severity": "warning", "long": "1d", "short": "1h30m", "slo": "monitoring-grpc-latency", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore"}, + Labels: map[string]string{"severity": "warning", "long": "1d", "short": "1h30m", "slo": "monitoring-grpc-latency", "grpc_method": "Write", "grpc_service": "conprof.WritableProfileStore", "exhaustion": "1w"}, }}, }, }, { @@ -668,22 +668,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", For: "1m", Expr: intstr.FromString(`prometheus_operator_reconcile_operations:burnrate3m{slo="monitoring-prometheus-operator-errors"} > (14 * (1-0.99)) and prometheus_operator_reconcile_operations:burnrate30m{slo="monitoring-prometheus-operator-errors"} > (14 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "30m", "slo": "monitoring-prometheus-operator-errors", "short": "3m"}, + Labels: map[string]string{"severity": "critical", "long": "30m", "slo": "monitoring-prometheus-operator-errors", "short": "3m", "exhaustion": "1d"}, }, { Alert: "ErrorBudgetBurn", For: "8m", Expr: intstr.FromString(`prometheus_operator_reconcile_operations:burnrate15m{slo="monitoring-prometheus-operator-errors"} > (7 * (1-0.99)) and prometheus_operator_reconcile_operations:burnrate3h{slo="monitoring-prometheus-operator-errors"} > (7 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "3h", "slo": "monitoring-prometheus-operator-errors", "short": "15m"}, + Labels: map[string]string{"severity": "critical", "long": "3h", "slo": "monitoring-prometheus-operator-errors", "short": "15m", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", For: "30m", Expr: intstr.FromString(`prometheus_operator_reconcile_operations:burnrate1h{slo="monitoring-prometheus-operator-errors"} > (2 * (1-0.99)) and prometheus_operator_reconcile_operations:burnrate12h{slo="monitoring-prometheus-operator-errors"} > (2 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "12h", "slo": "monitoring-prometheus-operator-errors", "short": "1h"}, + Labels: map[string]string{"severity": "warning", "long": "12h", "slo": "monitoring-prometheus-operator-errors", "short": "1h", "exhaustion": "1w"}, }, { Alert: "ErrorBudgetBurn", For: "1h30m", Expr: intstr.FromString(`prometheus_operator_reconcile_operations:burnrate3h{slo="monitoring-prometheus-operator-errors"} > (1 * (1-0.99)) and prometheus_operator_reconcile_operations:burnrate2d{slo="monitoring-prometheus-operator-errors"} > (1 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "2d", "slo": "monitoring-prometheus-operator-errors", "short": "3h"}, + Labels: map[string]string{"severity": "warning", "long": "2d", "slo": "monitoring-prometheus-operator-errors", "short": "3h", "exhaustion": "2w"}, }}, }, }, { @@ -724,22 +724,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", For: "1m", Expr: intstr.FromString(`prometheus_operator_reconcile_operations:burnrate3m{slo="monitoring-prometheus-operator-errors"} > (14 * (1-0.99)) and prometheus_operator_reconcile_operations:burnrate30m{slo="monitoring-prometheus-operator-errors"} > (14 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "30m", "slo": "monitoring-prometheus-operator-errors", "short": "3m"}, + Labels: map[string]string{"severity": "critical", "long": "30m", "slo": "monitoring-prometheus-operator-errors", "short": "3m", "exhaustion": "1d"}, }, { Alert: "ErrorBudgetBurn", For: "8m", Expr: intstr.FromString(`prometheus_operator_reconcile_operations:burnrate15m{slo="monitoring-prometheus-operator-errors"} > (7 * (1-0.99)) and prometheus_operator_reconcile_operations:burnrate3h{slo="monitoring-prometheus-operator-errors"} > (7 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "3h", "slo": "monitoring-prometheus-operator-errors", "short": "15m"}, + Labels: map[string]string{"severity": "critical", "long": "3h", "slo": "monitoring-prometheus-operator-errors", "short": "15m", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", For: "30m", Expr: intstr.FromString(`prometheus_operator_reconcile_operations:burnrate1h{slo="monitoring-prometheus-operator-errors"} > (2 * (1-0.99)) and prometheus_operator_reconcile_operations:burnrate12h{slo="monitoring-prometheus-operator-errors"} > (2 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "12h", "slo": "monitoring-prometheus-operator-errors", "short": "1h"}, + Labels: map[string]string{"severity": "warning", "long": "12h", "slo": "monitoring-prometheus-operator-errors", "short": "1h", "exhaustion": "1w"}, }, { Alert: "ErrorBudgetBurn", For: "1h30m", Expr: intstr.FromString(`prometheus_operator_reconcile_operations:burnrate3h{slo="monitoring-prometheus-operator-errors"} > (1 * (1-0.99)) and prometheus_operator_reconcile_operations:burnrate2d{slo="monitoring-prometheus-operator-errors"} > (1 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "2d", "slo": "monitoring-prometheus-operator-errors", "short": "3h"}, + Labels: map[string]string{"severity": "warning", "long": "2d", "slo": "monitoring-prometheus-operator-errors", "short": "3h", "exhaustion": "2w"}, }}, }, }, { @@ -780,22 +780,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate3m{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (14 * (1-0.99)) and apiserver_request:burnrate30m{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (14 * (1-0.99))`), For: "1m", - Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-write-response-errors"}, + Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate15m{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (7 * (1-0.99)) and apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (7 * (1-0.99))`), For: "8m", - Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-write-response-errors"}, + Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate1h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (2 * (1-0.99)) and apiserver_request:burnrate12h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (2 * (1-0.99))`), For: "30m", - Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-write-response-errors"}, + Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1w"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (1 * (1-0.99)) and apiserver_request:burnrate2d{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (1 * (1-0.99))`), For: "1h30m", - Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-write-response-errors"}, + Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2w"}, }}, }, }, { @@ -872,22 +872,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request_duration_seconds:burnrate3m{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (14 * (1-0.99)) and apiserver_request_duration_seconds:burnrate30m{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (14 * (1-0.99))`), For: "1m", - Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-read-resource-latency"}, + Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-read-resource-latency", "exhaustion": "1d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request_duration_seconds:burnrate15m{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (7 * (1-0.99)) and apiserver_request_duration_seconds:burnrate3h{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (7 * (1-0.99))`), For: "8m", - Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-read-resource-latency"}, + Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-read-resource-latency", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request_duration_seconds:burnrate1h{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (2 * (1-0.99)) and apiserver_request_duration_seconds:burnrate12h{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (2 * (1-0.99))`), For: "30m", - Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-read-resource-latency"}, + Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-read-resource-latency", "exhaustion": "1w"}, }, { Alert: "ErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request_duration_seconds:burnrate3h{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (1 * (1-0.99)) and apiserver_request_duration_seconds:burnrate2d{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (1 * (1-0.99))`), For: "1h30m", - Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-read-resource-latency"}, + Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-read-resource-latency", "exhaustion": "2w"}, }}, }, }, { @@ -964,22 +964,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "APIServerLatencyErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request_duration_seconds:burnrate3m{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (14 * (1-0.99)) and apiserver_request_duration_seconds:burnrate30m{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (14 * (1-0.99))`), For: "1m", - Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-read-resource-latency"}, + Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-read-resource-latency", "exhaustion": "1d"}, }, { Alert: "APIServerLatencyErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request_duration_seconds:burnrate15m{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (7 * (1-0.99)) and apiserver_request_duration_seconds:burnrate3h{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (7 * (1-0.99))`), For: "8m", - Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-read-resource-latency"}, + Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-read-resource-latency", "exhaustion": "2d"}, }, { Alert: "APIServerLatencyErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request_duration_seconds:burnrate1h{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (2 * (1-0.99)) and apiserver_request_duration_seconds:burnrate12h{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (2 * (1-0.99))`), For: "30m", - Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-read-resource-latency"}, + Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-read-resource-latency", "exhaustion": "1w"}, }, { Alert: "APIServerLatencyErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request_duration_seconds:burnrate3h{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (1 * (1-0.99)) and apiserver_request_duration_seconds:burnrate2d{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"} > (1 * (1-0.99))`), For: "1h30m", - Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-read-resource-latency"}, + Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-read-resource-latency", "exhaustion": "2w"}, }}, }, }, { @@ -1020,22 +1020,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", For: "2m", Expr: intstr.FromString(`up:burnrate5m{slo="up-targets"} > (14 * (1-0.99)) and up:burnrate1h{slo="up-targets"} > (14 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "1h", "short": "5m", "slo": "up-targets"}, + Labels: map[string]string{"severity": "critical", "long": "1h", "short": "5m", "slo": "up-targets", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", For: "15m", Expr: intstr.FromString(`up:burnrate30m{slo="up-targets"} > (7 * (1-0.99)) and up:burnrate6h{slo="up-targets"} > (7 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "6h", "slo": "up-targets", "short": "30m"}, + Labels: map[string]string{"severity": "critical", "long": "6h", "slo": "up-targets", "short": "30m", "exhaustion": "4d"}, }, { Alert: "ErrorBudgetBurn", For: "1h", Expr: intstr.FromString(`up:burnrate2h{slo="up-targets"} > (2 * (1-0.99)) and up:burnrate1d{slo="up-targets"} > (2 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "1d", "slo": "up-targets", "short": "2h"}, + Labels: map[string]string{"severity": "warning", "long": "1d", "slo": "up-targets", "short": "2h", "exhaustion": "2w"}, }, { Alert: "ErrorBudgetBurn", For: "3h", Expr: intstr.FromString(`up:burnrate6h{slo="up-targets"} > (1 * (1-0.99)) and up:burnrate4d{slo="up-targets"} > (1 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "4d", "slo": "up-targets", "short": "6h"}, + Labels: map[string]string{"severity": "warning", "long": "4d", "slo": "up-targets", "short": "6h", "exhaustion": "4w"}, }}, }, }, { @@ -1076,22 +1076,22 @@ func TestObjective_Burnrates(t *testing.T) { Alert: "ErrorBudgetBurn", For: "2m", Expr: intstr.FromString(`up:burnrate5m{instance!~"(127.0.0.1|localhost).*",slo="up-targets"} > (14 * (1-0.99)) and up:burnrate1h{instance!~"(127.0.0.1|localhost).*",slo="up-targets"} > (14 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "1h", "short": "5m", "slo": "up-targets"}, + Labels: map[string]string{"severity": "critical", "long": "1h", "short": "5m", "slo": "up-targets", "exhaustion": "2d"}, }, { Alert: "ErrorBudgetBurn", For: "15m", Expr: intstr.FromString(`up:burnrate30m{instance!~"(127.0.0.1|localhost).*",slo="up-targets"} > (7 * (1-0.99)) and up:burnrate6h{instance!~"(127.0.0.1|localhost).*",slo="up-targets"} > (7 * (1-0.99))`), - Labels: map[string]string{"severity": "critical", "long": "6h", "slo": "up-targets", "short": "30m"}, + Labels: map[string]string{"severity": "critical", "long": "6h", "slo": "up-targets", "short": "30m", "exhaustion": "4d"}, }, { Alert: "ErrorBudgetBurn", For: "1h", Expr: intstr.FromString(`up:burnrate2h{instance!~"(127.0.0.1|localhost).*",slo="up-targets"} > (2 * (1-0.99)) and up:burnrate1d{instance!~"(127.0.0.1|localhost).*",slo="up-targets"} > (2 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "1d", "slo": "up-targets", "short": "2h"}, + Labels: map[string]string{"severity": "warning", "long": "1d", "slo": "up-targets", "short": "2h", "exhaustion": "2w"}, }, { Alert: "ErrorBudgetBurn", For: "3h", Expr: intstr.FromString(`up:burnrate6h{instance!~"(127.0.0.1|localhost).*",slo="up-targets"} > (1 * (1-0.99)) and up:burnrate4d{instance!~"(127.0.0.1|localhost).*",slo="up-targets"} > (1 * (1-0.99))`), - Labels: map[string]string{"severity": "warning", "long": "4d", "slo": "up-targets", "short": "6h"}, + Labels: map[string]string{"severity": "warning", "long": "4d", "slo": "up-targets", "short": "6h", "exhaustion": "4w"}, }}, }, }} From 2c8e69422d7bae60d009898d6a9fe4c2700a0cc2 Mon Sep 17 00:00:00 2001 From: Matthias Loibl Date: Fri, 14 Jul 2023 13:24:50 +0200 Subject: [PATCH 3/3] slo: Extract Exhausts to method on Objective --- slo/rules.go | 32 ++++---------------------------- slo/slo.go | 4 ++++ 2 files changed, 8 insertions(+), 28 deletions(-) diff --git a/slo/rules.go b/slo/rules.go index 5845bd733..4dcb94e49 100644 --- a/slo/rules.go +++ b/slo/rules.go @@ -125,13 +125,7 @@ func (o Objective) Burnrates() (monitoringv1.RuleGroup, error) { alertLabels["short"] = model.Duration(w.Short).String() alertLabels["long"] = model.Duration(w.Long).String() alertLabels["severity"] = string(w.Severity) - - // Calculate exhaustion and propagate it as alert label - parsedLongWindowDuration, _ := model.ParseDuration(o.Window.String()) - parsedWindowDuration := time.Duration(parsedLongWindowDuration) - exhaustionCalculation := int(parsedWindowDuration.Seconds()) / int(w.Factor) - parsedExhaustionCalculation, _ := model.ParseDuration(fmt.Sprintf("%ds", exhaustionCalculation)) - alertLabels["exhaustion"] = parsedExhaustionCalculation.String() + alertLabels["exhaustion"] = o.Exhausts(w.Factor).String() r := monitoringv1.Rule{ Alert: o.AlertName(), @@ -219,13 +213,7 @@ func (o Objective) Burnrates() (monitoringv1.RuleGroup, error) { alertLabels["short"] = model.Duration(w.Short).String() alertLabels["long"] = model.Duration(w.Long).String() alertLabels["severity"] = string(w.Severity) - - // Calculate exhaustion and propagate it as alert label - parsedLongWindowDuration, _ := model.ParseDuration(o.Window.String()) - parsedWindowDuration := time.Duration(parsedLongWindowDuration) - exhaustionCalculation := int(parsedWindowDuration.Seconds()) / int(w.Factor) - parsedExhaustionCalculation, _ := model.ParseDuration(fmt.Sprintf("%ds", exhaustionCalculation)) - alertLabels["exhaustion"] = parsedExhaustionCalculation.String() + alertLabels["exhaustion"] = o.Exhausts(w.Factor).String() r := monitoringv1.Rule{ Alert: o.AlertName(), @@ -313,13 +301,7 @@ func (o Objective) Burnrates() (monitoringv1.RuleGroup, error) { alertLabels["short"] = model.Duration(w.Short).String() alertLabels["long"] = model.Duration(w.Long).String() alertLabels["severity"] = string(w.Severity) - - // Calculate exhaustion and propagate it as alert label - parsedLongWindowDuration, _ := model.ParseDuration(o.Window.String()) - parsedWindowDuration := time.Duration(parsedLongWindowDuration) - exhaustionCalculation := int(parsedWindowDuration.Seconds()) / int(w.Factor) - parsedExhaustionCalculation, _ := model.ParseDuration(fmt.Sprintf("%ds", exhaustionCalculation)) - alertLabels["exhaustion"] = parsedExhaustionCalculation.String() + alertLabels["exhaustion"] = o.Exhausts(w.Factor).String() r := monitoringv1.Rule{ Alert: o.AlertName(), @@ -407,13 +389,7 @@ func (o Objective) Burnrates() (monitoringv1.RuleGroup, error) { alertLabels["short"] = model.Duration(w.Short).String() alertLabels["long"] = model.Duration(w.Long).String() alertLabels["severity"] = string(w.Severity) - - // Calculate exhaustion and propagate it as alert label - parsedLongWindowDuration, _ := model.ParseDuration(o.Window.String()) - parsedWindowDuration := time.Duration(parsedLongWindowDuration) - exhaustionCalculation := int(parsedWindowDuration.Seconds()) / int(w.Factor) - parsedExhaustionCalculation, _ := model.ParseDuration(fmt.Sprintf("%ds", exhaustionCalculation)) - alertLabels["exhaustion"] = parsedExhaustionCalculation.String() + alertLabels["exhaustion"] = o.Exhausts(w.Factor).String() r := monitoringv1.Rule{ Alert: o.AlertName(), diff --git a/slo/slo.go b/slo/slo.go index 60e1fcca7..91fd51830 100644 --- a/slo/slo.go +++ b/slo/slo.go @@ -50,6 +50,10 @@ func (o Objective) HasWindows(short, long model.Duration) (Window, bool) { return Window{}, false } +func (o Objective) Exhausts(factor float64) model.Duration { + return model.Duration(time.Second * time.Duration(time.Duration(o.Window).Seconds()/factor)) +} + type IndicatorType int const (