Skip to content

Commit

Permalink
Merge pull request #1041 from sepulworld/alertname_for_missing_data
Browse files Browse the repository at this point in the history
allow SLOMetricAbsent alert name to be configurable
  • Loading branch information
metalmatze authored Apr 1, 2024
2 parents 3f99310 + 3ad61c9 commit dfcb0b7
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 20 deletions.
7 changes: 7 additions & 0 deletions kubernetes/api/v1alpha1/servicelevelobjective_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ type Alerting struct {
// +optional
// Name is used as the name of the alert generated by Pyrra. Defaults to "ErrorBudgetBurn".
Name string `json:"name,omitempty"`

// +optional
// AbsentName is used as the name of the absent alert generated by Pyrra. Defaults to "SLOMetricAbsent".
AbsentName string `json:"absentName,omitempty"`
}

type RatioIndicator struct {
Expand Down Expand Up @@ -368,6 +372,9 @@ func (in *ServiceLevelObjective) Internal() (slo.Objective, error) {
if in.Spec.Alerting.Name != "" {
alerting.Name = in.Spec.Alerting.Name
}
if in.Spec.Alerting.AbsentName != "" {
alerting.AbsentName = in.Spec.Alerting.AbsentName
}

if in.Spec.ServiceLevelIndicator.Ratio != nil && in.Spec.ServiceLevelIndicator.Latency != nil {
return slo.Objective{}, fmt.Errorf("cannot have ratio and latency indicators at the same time")
Expand Down
6 changes: 4 additions & 2 deletions slo/promql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,10 @@ var (
Target: 0.99,
Window: model.Duration(14 * 24 * time.Hour),
Alerting: Alerting{
Burnrates: true,
Absent: true,
Name: "APIServerErrorBudgetBurn",
AbsentName: "APIServerMetricAbsent",
Burnrates: true,
Absent: true,
},
Indicator: Indicator{
Ratio: &RatioIndicator{
Expand Down
10 changes: 5 additions & 5 deletions slo/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) {
}.replace(expr)

rules = append(rules, monitoringv1.Rule{
Alert: "SLOMetricAbsent",
Alert: o.AlertNameAbsent(),
Expr: intstr.FromString(expr.String()),
For: monitoringDuration(model.Duration(
(time.Duration(o.Window) / (28 * 24 * (60 / 2))).Round(time.Minute),
Expand Down Expand Up @@ -755,7 +755,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) {
}.replace(expr)

rules = append(rules, monitoringv1.Rule{
Alert: "SLOMetricAbsent",
Alert: o.AlertNameAbsent(),
Expr: intstr.FromString(expr.String()),
For: monitoringDuration(model.Duration(
(time.Duration(o.Window) / (28 * 24 * (60 / 2))).Round(time.Minute),
Expand Down Expand Up @@ -867,7 +867,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) {
alertLabels["severity"] = string(critical)

rules = append(rules, monitoringv1.Rule{
Alert: "SLOMetricAbsent",
Alert: o.AlertNameAbsent(),
Expr: intstr.FromString(expr.String()),
For: monitoringDuration(model.Duration(
(time.Duration(o.Window) / (28 * 24 * (60 / 2))).Round(time.Minute),
Expand All @@ -894,7 +894,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) {
alertLabelsLe["severity"] = string(critical)

rules = append(rules, monitoringv1.Rule{
Alert: "SLOMetricAbsent",
Alert: o.AlertNameAbsent(),
Expr: intstr.FromString(expr.String()),
For: monitoringDuration(model.Duration(
(time.Duration(o.Window) / (28 * 24 * (60 / 2))).Round(time.Minute),
Expand Down Expand Up @@ -1037,7 +1037,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) {
alertLabels["severity"] = string(critical)

rules = append(rules, monitoringv1.Rule{
Alert: "SLOMetricAbsent",
Alert: o.AlertNameAbsent(),
Expr: intstr.FromString(expr.String()),
For: monitoringDuration(model.Duration(
(time.Duration(o.Window) / (28 * 24 * (60 / 2))).Round(time.Minute),
Expand Down
86 changes: 77 additions & 9 deletions slo/rules_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -777,22 +777,22 @@ func TestObjective_Burnrates(t *testing.T) {
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Alert: "ErrorBudgetBurn",
Alert: "APIServerErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate3m{job="apiserver",slo="apiserver-write-response-errors"} > (14 * (1-0.99)) and apiserver_request:burnrate30m{job="apiserver",slo="apiserver-write-response-errors"} > (14 * (1-0.99))`),
For: monitoringDuration("1m0s"),
Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1d"},
}, {
Alert: "ErrorBudgetBurn",
Alert: "APIServerErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate15m{job="apiserver",slo="apiserver-write-response-errors"} > (7 * (1-0.99)) and apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors"} > (7 * (1-0.99))`),
For: monitoringDuration("8m0s"),
Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2d"},
}, {
Alert: "ErrorBudgetBurn",
Alert: "APIServerErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate1h{job="apiserver",slo="apiserver-write-response-errors"} > (2 * (1-0.99)) and apiserver_request:burnrate12h{job="apiserver",slo="apiserver-write-response-errors"} > (2 * (1-0.99))`),
For: monitoringDuration("30m0s"),
Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1w"},
}, {
Alert: "ErrorBudgetBurn",
Alert: "APIServerErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors"} > (1 * (1-0.99)) and apiserver_request:burnrate2d{job="apiserver",slo="apiserver-write-response-errors"} > (1 * (1-0.99))`),
For: monitoringDuration("1h30m0s"),
Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2w"},
Expand Down Expand Up @@ -833,22 +833,22 @@ func TestObjective_Burnrates(t *testing.T) {
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Alert: "ErrorBudgetBurn",
Alert: "APIServerErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate3m{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (14 * (1-0.99)) and apiserver_request:burnrate30m{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (14 * (1-0.99))`),
For: monitoringDuration("1m0s"),
Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1d"},
}, {
Alert: "ErrorBudgetBurn",
Alert: "APIServerErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate15m{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (7 * (1-0.99)) and apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (7 * (1-0.99))`),
For: monitoringDuration("8m0s"),
Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2d"},
}, {
Alert: "ErrorBudgetBurn",
Alert: "APIServerErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate1h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (2 * (1-0.99)) and apiserver_request:burnrate12h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (2 * (1-0.99))`),
For: monitoringDuration("30m0s"),
Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1w"},
}, {
Alert: "ErrorBudgetBurn",
Alert: "APIServerErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (1 * (1-0.99)) and apiserver_request:burnrate2d{job="apiserver",slo="apiserver-write-response-errors",verb=~"POST|PUT|PATCH|DELETE"} > (1 * (1-0.99))`),
For: monitoringDuration("1h30m0s"),
Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2w"},
Expand Down Expand Up @@ -1548,7 +1548,7 @@ func TestObjective_IncreaseRules(t *testing.T) {
Expr: intstr.FromString(`sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2w]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Alert: "SLOMetricAbsent",
Alert: "APIServerMetricAbsent",
Expr: intstr.FromString(`absent(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}) == 1`),
For: monitoringDuration("1m"),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors", "severity": "critical"},
Expand Down Expand Up @@ -1925,3 +1925,71 @@ func TestObjective_GrafanaRules(t *testing.T) {
})
}
}

func TestObjective_AlertName(t *testing.T) {
tests := []struct {
name string
objective Objective
want string
}{
{
name: "alert name present",
objective: Objective{
Alerting: Alerting{
Name: "test-alert",
},
},
want: "test-alert",
},
{
name: "alert name absent",
objective: Objective{
Alerting: Alerting{},
},
want: defaultAlertname,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
o := tt.objective
got := o.AlertName()
if got != tt.want {
t.Errorf("AlertName() = %v, want %v", got, tt.want)
}
})
}
}

func TestObjective_AlertNameMetricAbsent(t *testing.T) {
tests := []struct {
name string
objective Objective
want string
}{
{
name: "AlertNameAbsentDefault",
objective: Objective{
Alerting: Alerting{},
},
want: defaultAlertnameAbsent,
},
{
name: "AlertNameAbsentCustom",
objective: Objective{
Alerting: Alerting{
AbsentName: "foo",
},
},
want: "foo",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
o := tt.objective
got := o.AlertNameAbsent()
if got != tt.want {
t.Errorf("AlertNameMetricAbsent() = %v, want %v", got, tt.want)
}
})
}
}
18 changes: 14 additions & 4 deletions slo/slo.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const (
// ObjectMeta to the PrometheusRule.
PropagationLabelsPrefix = "pyrra.dev/"
defaultAlertname = "ErrorBudgetBurn"
defaultAlertnameAbsent = "SLOMetricAbsent"
)

type Objective struct {
Expand Down Expand Up @@ -103,6 +104,14 @@ func (o Objective) AlertName() string {
return defaultAlertname
}

func (o Objective) AlertNameAbsent() string {
if o.Alerting.AbsentName != "" {
return o.Alerting.AbsentName
}

return defaultAlertnameAbsent
}

type Indicator struct {
Ratio *RatioIndicator
Latency *LatencyIndicator
Expand Down Expand Up @@ -134,10 +143,11 @@ type BoolGaugeIndicator struct {
}

type Alerting struct {
Disabled bool // deprecated, use Burnrates instead
Burnrates bool
Absent bool
Name string
Disabled bool // deprecated, use Burnrates instead
Burnrates bool
Absent bool
Name string
AbsentName string
}

type Metric struct {
Expand Down

0 comments on commit dfcb0b7

Please sign in to comment.