Skip to content

Commit

Permalink
slo: Fix total query for latency
Browse files Browse the repository at this point in the history
We summed up the total and errors instead of querying for only the total by matching `le=""`. This is fixed now and the error budget graph and card have the same values again.

Closes #697
  • Loading branch information
metalmatze committed Apr 21, 2023
1 parent ce97bfe commit c0ef3f0
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 11 deletions.
13 changes: 8 additions & 5 deletions slo/promql.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ func (o Objective) QueryTotal(window model.Duration) string {
grouping = slices.Clone(o.Indicator.Ratio.Grouping)
case Latency:
metric = increaseName(o.Indicator.Latency.Total.Name, window)
matchers = cloneMatchers(o.Indicator.Latency.Total.LabelMatchers)
matchers = append(
cloneMatchers(o.Indicator.Latency.Total.LabelMatchers),
&labels.Matcher{Type: labels.MatchEqual, Name: labels.BucketLabel, Value: ""},
)
grouping = slices.Clone(o.Indicator.Latency.Grouping)
case BoolGauge:
metric = countName(o.Indicator.BoolGauge.Name, window)
Expand Down Expand Up @@ -107,7 +110,7 @@ func (o Objective) QueryErrors(window model.Duration) string {
}
}
// Add the matcher {le=""} to select the recording rule that summed up all requests
matchers = append(matchers, &labels.Matcher{Type: labels.MatchEqual, Name: "le", Value: ""})
matchers = append(matchers, &labels.Matcher{Type: labels.MatchEqual, Name: labels.BucketLabel, Value: ""})
matchers = append(matchers, &labels.Matcher{
Type: labels.MatchEqual,
Name: "slo",
Expand Down Expand Up @@ -272,7 +275,7 @@ func (o Objective) QueryErrorBudget() string {
}
}
// Add the matcher {le=""} to select the recording rule that summed up all requests
matchers = append(matchers, &labels.Matcher{Type: labels.MatchEqual, Name: "le", Value: ""})
matchers = append(matchers, &labels.Matcher{Type: labels.MatchEqual, Name: labels.BucketLabel, Value: ""})
matchers = append(matchers, &labels.Matcher{
Type: labels.MatchEqual,
Name: "slo",
Expand Down Expand Up @@ -660,7 +663,7 @@ func (o Objective) DurationRange(timerange time.Duration, percentile float64) st
errorMetric: o.Indicator.Latency.Success.Name,
errorMatchers: matchers,
window: timerange,
grouping: []string{"le"},
grouping: []string{labels.BucketLabel},
percentile: percentile,
}.replace(expr)

Expand All @@ -681,7 +684,7 @@ func groupingLabels(errorMatchers, totalMatchers []*labels.Matcher) []string {

// This deletes the le label as grouping by it should usually not be wanted,
// and we have to remove it for the latency SLOs.
delete(groupingLabels, "le")
delete(groupingLabels, labels.BucketLabel)

return maps.Keys(groupingLabels)
}
Expand Down
12 changes: 6 additions & 6 deletions slo/promql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,23 +324,23 @@ func TestObjective_QueryTotal(t *testing.T) {
}, {
name: "http-latency",
objective: objectiveHTTPLatency(),
expected: `sum(http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"})`,
expected: `sum(http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",le="",slo="monitoring-http-latency"})`,
}, {
name: "http-latency-grouping",
objective: objectiveHTTPLatencyGrouping(),
expected: `sum by (job, handler) (http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"})`,
expected: `sum by (job, handler) (http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",le="",slo="monitoring-http-latency"})`,
}, {
name: "http-latency-grouping-regex",
objective: objectiveHTTPLatencyGroupingRegex(),
expected: `sum by (job, handler) (http_request_duration_seconds:increase4w{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"})`,
expected: `sum by (job, handler) (http_request_duration_seconds:increase4w{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",le="",slo="monitoring-http-latency"})`,
}, {
name: "grpc-latency",
objective: objectiveGRPCLatency(),
expected: `sum(grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"})`,
expected: `sum(grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="",slo="monitoring-grpc-latency"})`,
}, {
name: "grpc-latency-grouping",
objective: objectiveGRPCLatencyGrouping(),
expected: `sum by (job, handler) (grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"})`,
expected: `sum by (job, handler) (grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="",slo="monitoring-grpc-latency"})`,
}, {
name: "operator-ratio",
objective: objectiveOperator(),
Expand All @@ -356,7 +356,7 @@ func TestObjective_QueryTotal(t *testing.T) {
}, {
name: "apiserver-read-resource-latency",
objective: objectiveAPIServerLatency(),
expected: `sum by (resource, verb) (apiserver_request_duration_seconds:increase2w{job="apiserver",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"})`,
expected: `sum by (resource, verb) (apiserver_request_duration_seconds:increase2w{job="apiserver",le="",resource=~"resource|",slo="apiserver-read-resource-latency",verb=~"LIST|GET"})`,
}, {
name: "up-targets",
objective: objectiveUpTargets(),
Expand Down

0 comments on commit c0ef3f0

Please sign in to comment.