Skip to content

Commit

Permalink
slo: fix latency burnrate recording rules with regexp matchers
Browse files Browse the repository at this point in the history
Same as #327
Ratio SLOs were the only ones creating these "grouping" on regexp filters

Fixes #1050
  • Loading branch information
Adrien Bestel authored and metalmatze committed Feb 20, 2024
1 parent 6bf8659 commit 70c5c40
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 13 deletions.
5 changes: 5 additions & 0 deletions slo/promql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,11 @@ var (
},
}
}
objectiveAPIServerRatioGrouping = func() Objective {
o := objectiveAPIServerRatio()
o.Indicator.Ratio.Grouping = []string{"verb"}
return o
}
objectiveAPIServerRatioAlertingDisabled = func() Objective {
o := objectiveAPIServerRatio()
o.Alerting.Disabled = true
Expand Down
11 changes: 6 additions & 5 deletions slo/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ func (o Objective) Burnrates() (monitoringv1.RuleGroup, error) {
if m.Name == labels.MetricName {
continue
}
if _, ok := groupingMap[m.Name]; !ok {
if m.Type == labels.MatchRegexp || m.Type == labels.MatchNotRegexp {
continue
}
}

alertMatchers = append(alertMatchers, m.String())
}
alertMatchers = append(alertMatchers, fmt.Sprintf(`slo="%s"`, sloName))
Expand Down Expand Up @@ -452,11 +458,6 @@ func (o Objective) Burnrate(timerange time.Duration) string {
for _, s := range o.Indicator.Ratio.Grouping {
groupingMap[s] = struct{}{}
}
for _, m := range o.Indicator.Ratio.Total.LabelMatchers {
if m.Type == labels.MatchRegexp || m.Type == labels.MatchNotRegexp {
groupingMap[m.Name] = struct{}{}
}
}

grouping := make([]string, 0, len(groupingMap))
for s := range groupingMap {
Expand Down
72 changes: 64 additions & 8 deletions slo/rules_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,62 @@ func TestObjective_Burnrates(t *testing.T) {
}, {
name: "apiserver-write-response-errors",
slo: objectiveAPIServerRatio(),
rules: monitoringv1.RuleGroup{
Name: "apiserver-write-response-errors",
Interval: monitoringDuration("30s"),
Rules: []monitoringv1.Rule{{
Record: "apiserver_request:burnrate3m",
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate15m",
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate30m",
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate1h",
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate3h",
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate12h",
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate2d",
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Alert: "ErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate3m{job="apiserver",slo="apiserver-write-response-errors"} > (14 * (1-0.99)) and apiserver_request:burnrate30m{job="apiserver",slo="apiserver-write-response-errors"} > (14 * (1-0.99))`),
For: monitoringDuration("1m0s"),
Labels: map[string]string{"severity": "critical", "long": "30m", "short": "3m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1d"},
}, {
Alert: "ErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate15m{job="apiserver",slo="apiserver-write-response-errors"} > (7 * (1-0.99)) and apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors"} > (7 * (1-0.99))`),
For: monitoringDuration("8m0s"),
Labels: map[string]string{"severity": "critical", "long": "3h", "short": "15m", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2d"},
}, {
Alert: "ErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate1h{job="apiserver",slo="apiserver-write-response-errors"} > (2 * (1-0.99)) and apiserver_request:burnrate12h{job="apiserver",slo="apiserver-write-response-errors"} > (2 * (1-0.99))`),
For: monitoringDuration("30m0s"),
Labels: map[string]string{"severity": "warning", "long": "12h", "short": "1h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "1w"},
}, {
Alert: "ErrorBudgetBurn",
Expr: intstr.FromString(`apiserver_request:burnrate3h{job="apiserver",slo="apiserver-write-response-errors"} > (1 * (1-0.99)) and apiserver_request:burnrate2d{job="apiserver",slo="apiserver-write-response-errors"} > (1 * (1-0.99))`),
For: monitoringDuration("1h30m0s"),
Labels: map[string]string{"severity": "warning", "long": "2d", "short": "3h", "job": "apiserver", "slo": "apiserver-write-response-errors", "exhaustion": "2w"},
}},
},
}, {
name: "apiserver-write-response-errors-grouping",
slo: objectiveAPIServerRatioGrouping(),
rules: monitoringv1.RuleGroup{
Name: "apiserver-write-response-errors",
Interval: monitoringDuration("30s"),
Expand Down Expand Up @@ -806,31 +862,31 @@ func TestObjective_Burnrates(t *testing.T) {
Interval: monitoringDuration("30s"),
Rules: []monitoringv1.Rule{{
Record: "apiserver_request:burnrate3m",
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m]))`),
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3m]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate15m",
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m]))`),
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[15m]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate30m",
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))`),
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate1h",
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))`),
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate3h",
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h]))`),
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3h]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate12h",
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h]))`),
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[12h]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}, {
Record: "apiserver_request:burnrate2d",
Expr: intstr.FromString(`sum by (verb) (rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d])) / sum by (verb) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d]))`),
Expr: intstr.FromString(`sum(rate(apiserver_request_total{code=~"5..",job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d])) / sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2d]))`),
Labels: map[string]string{"job": "apiserver", "slo": "apiserver-write-response-errors"},
}},
},
Expand Down Expand Up @@ -1096,7 +1152,7 @@ func TestObjective_Burnrates(t *testing.T) {
},
}}

require.Len(t, testcases, 20)
require.Len(t, testcases, 21)

for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
Expand Down

0 comments on commit 70c5c40

Please sign in to comment.