diff --git a/kubernetes/api/v1alpha1/servicelevelobjective_types.go b/kubernetes/api/v1alpha1/servicelevelobjective_types.go index 18b9685b..62bf9794 100644 --- a/kubernetes/api/v1alpha1/servicelevelobjective_types.go +++ b/kubernetes/api/v1alpha1/servicelevelobjective_types.go @@ -130,6 +130,10 @@ type Alerting struct { // +optional // Name is used as the name of the alert generated by Pyrra. Defaults to "ErrorBudgetBurn". Name string `json:"name,omitempty"` + + // +optional + // AbsentName is used as the name of the absent alert generated by Pyrra. Defaults to "SLOMetricAbsent". + AbsentName string `json:"absentName,omitempty"` } type RatioIndicator struct { @@ -368,6 +372,9 @@ func (in *ServiceLevelObjective) Internal() (slo.Objective, error) { if in.Spec.Alerting.Name != "" { alerting.Name = in.Spec.Alerting.Name } + if in.Spec.Alerting.AbsentName != "" { + alerting.AbsentName = in.Spec.Alerting.AbsentName + } if in.Spec.ServiceLevelIndicator.Ratio != nil && in.Spec.ServiceLevelIndicator.Latency != nil { return slo.Objective{}, fmt.Errorf("cannot have ratio and latency indicators at the same time") diff --git a/slo/promql_test.go b/slo/promql_test.go index 5a1937ca..df680012 100644 --- a/slo/promql_test.go +++ b/slo/promql_test.go @@ -240,8 +240,10 @@ var ( Target: 0.99, Window: model.Duration(14 * 24 * time.Hour), Alerting: Alerting{ - Burnrates: true, - Absent: true, + Name: "APIServerErrorBudgetBurn", + AbsentName: "APIServerMetricAbsent", + Burnrates: true, + Absent: true, }, Indicator: Indicator{ Ratio: &RatioIndicator{ diff --git a/slo/rules.go b/slo/rules.go index bedf1289..65c60cb9 100644 --- a/slo/rules.go +++ b/slo/rules.go @@ -713,7 +713,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) { }.replace(expr) rules = append(rules, monitoringv1.Rule{ - Alert: "SLOMetricAbsent", + Alert: o.AlertNameAbsent(), Expr: intstr.FromString(expr.String()), For: monitoringDuration(model.Duration( (time.Duration(o.Window) / (28 * 24 * (60 / 2))).Round(time.Minute), @@ -755,7 +755,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) { }.replace(expr) rules = append(rules, monitoringv1.Rule{ - Alert: "SLOMetricAbsent", + Alert: o.AlertNameAbsent(), Expr: intstr.FromString(expr.String()), For: monitoringDuration(model.Duration( (time.Duration(o.Window) / (28 * 24 * (60 / 2))).Round(time.Minute), @@ -867,7 +867,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) { alertLabels["severity"] = string(critical) rules = append(rules, monitoringv1.Rule{ - Alert: "SLOMetricAbsent", + Alert: o.AlertNameAbsent(), Expr: intstr.FromString(expr.String()), For: monitoringDuration(model.Duration( (time.Duration(o.Window) / (28 * 24 * (60 / 2))).Round(time.Minute), @@ -894,7 +894,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) { alertLabelsLe["severity"] = string(critical) rules = append(rules, monitoringv1.Rule{ - Alert: "SLOMetricAbsent", + Alert: o.AlertNameAbsent(), Expr: intstr.FromString(expr.String()), For: monitoringDuration(model.Duration( (time.Duration(o.Window) / (28 * 24 * (60 / 2))).Round(time.Minute), @@ -1037,7 +1037,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) { alertLabels["severity"] = string(critical) rules = append(rules, monitoringv1.Rule{ - Alert: "SLOMetricAbsent", + Alert: o.AlertNameAbsent(), Expr: intstr.FromString(expr.String()), For: monitoringDuration(model.Duration( (time.Duration(o.Window) / (28 * 24 * (60 / 2))).Round(time.Minute), diff --git a/slo/rules_test.go b/slo/rules_test.go index caca7170..9bb990d1 100644 --- a/slo/rules_test.go +++ b/slo/rules_test.go @@ -777,22 +777,22 @@ func TestObjective_Burnrates(t *testing.T) { Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d]))`), Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"}, }, { - Alert: "ErrorBudgetBurn", + Alert: "APIServerErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate3m{job="apiserver",slo="apiserver-write-response-errors"} > (14 * (1-0.99)) and apiserver_request:burnrate30m{job="apiserver",slo="apiserver-write-response-errors"} > (14 * (1-0.99))`), For: monitoringDuration("1m0s"), Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1d"}, }, { - Alert: "ErrorBudgetBurn", + Alert: "APIServerErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate15m{job="apiserver",slo="apiserver-write-response-errors"} > (7 * (1-0.99)) and apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors"} > (7 * (1-0.99))`), For: monitoringDuration("8m0s"), Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2d"}, }, { - Alert: "ErrorBudgetBurn", + Alert: "APIServerErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate1h{job="apiserver",slo="apiserver-write-response-errors"} > (2 * (1-0.99)) and apiserver_request:burnrate12h{job="apiserver",slo="apiserver-write-response-errors"} > (2 * (1-0.99))`), For: monitoringDuration("30m0s"), Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1w"}, }, { - Alert: "ErrorBudgetBurn", + Alert: "APIServerErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors"} > (1 * (1-0.99)) and apiserver_request:burnrate2d{job="apiserver",slo="apiserver-write-response-errors"} > (1 * (1-0.99))`), For: monitoringDuration("1h30m0s"), Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2w"}, @@ -833,22 +833,22 @@ func TestObjective_Burnrates(t *testing.T) { Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d]))`), Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"}, }, { - Alert: "ErrorBudgetBurn", + Alert: "APIServerErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate3m{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (14 * (1-0.99)) and apiserver_request:burnrate30m{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (14 * (1-0.99))`), For: monitoringDuration("1m0s"), Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1d"}, }, { - Alert: "ErrorBudgetBurn", + Alert: "APIServerErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate15m{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (7 * (1-0.99)) and apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (7 * (1-0.99))`), For: monitoringDuration("8m0s"), Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2d"}, }, { - Alert: "ErrorBudgetBurn", + Alert: "APIServerErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate1h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (2 * (1-0.99)) and apiserver_request:burnrate12h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (2 * (1-0.99))`), For: monitoringDuration("30m0s"), Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1w"}, }, { - Alert: "ErrorBudgetBurn", + Alert: "APIServerErrorBudgetBurn", Expr: intstr.FromString(`apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (1 * (1-0.99)) and apiserver_request:burnrate2d{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (1 * (1-0.99))`), For: monitoringDuration("1h30m0s"), Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2w"}, @@ -1548,7 +1548,7 @@ func TestObjective_IncreaseRules(t *testing.T) { Expr: intstr.FromString(`sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2w]))`), Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"}, }, { - Alert: "SLOMetricAbsent", + Alert: "APIServerMetricAbsent", Expr: intstr.FromString(`absent(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}) == 1`), For: monitoringDuration("1m"), Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors", "severity": "critical"}, @@ -1925,3 +1925,71 @@ func TestObjective_GrafanaRules(t *testing.T) { }) } } + +func TestObjective_AlertName(t *testing.T) { + tests := []struct { + name string + objective Objective + want string + }{ + { + name: "alert name present", + objective: Objective{ + Alerting: Alerting{ + Name: "test-alert", + }, + }, + want: "test-alert", + }, + { + name: "alert name absent", + objective: Objective{ + Alerting: Alerting{}, + }, + want: defaultAlertname, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + o := tt.objective + got := o.AlertName() + if got != tt.want { + t.Errorf("AlertName() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestObjective_AlertNameMetricAbsent(t *testing.T) { + tests := []struct { + name string + objective Objective + want string + }{ + { + name: "AlertNameAbsentDefault", + objective: Objective{ + Alerting: Alerting{}, + }, + want: defaultAlertnameAbsent, + }, + { + name: "AlertNameAbsentCustom", + objective: Objective{ + Alerting: Alerting{ + AbsentName: "foo", + }, + }, + want: "foo", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + o := tt.objective + got := o.AlertNameAbsent() + if got != tt.want { + t.Errorf("AlertNameMetricAbsent() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/slo/slo.go b/slo/slo.go index fb432f46..6cc2a0eb 100644 --- a/slo/slo.go +++ b/slo/slo.go @@ -13,6 +13,7 @@ const ( // ObjectMeta to the PrometheusRule. PropagationLabelsPrefix = "pyrra.dev/" defaultAlertname = "ErrorBudgetBurn" + defaultAlertnameAbsent = "SLOMetricAbsent" ) type Objective struct { @@ -103,6 +104,14 @@ func (o Objective) AlertName() string { return defaultAlertname } +func (o Objective) AlertNameAbsent() string { + if o.Alerting.AbsentName != "" { + return o.Alerting.AbsentName + } + + return defaultAlertnameAbsent +} + type Indicator struct { Ratio *RatioIndicator Latency *LatencyIndicator @@ -134,10 +143,11 @@ type BoolGaugeIndicator struct { } type Alerting struct { - Disabled bool // deprecated, use Burnrates instead - Burnrates bool - Absent bool - Name string + Disabled bool // deprecated, use Burnrates instead + Burnrates bool + Absent bool + Name string + AbsentName string } type Metric struct {