Skip to content

Commit

Permalink
Add customLabelFilter
Browse files Browse the repository at this point in the history
  • Loading branch information
bjorns163 committed Dec 17, 2024
1 parent 06148db commit b615cd0
Show file tree
Hide file tree
Showing 8 changed files with 18 additions and 13 deletions.
1 change: 1 addition & 0 deletions operations/helm/charts/mimir-distributed/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Entries should include a reference to the Pull Request that introduced the chang

## main / unreleased

* [ENHANCEMENT] Add extra values for KEDA auto scaling. #10265
* [CHANGE] Update rollout-operator version to 0.20.0. #9995
* [CHANGE] Remove the `track_sizes` feature for Memcached pods since it is unused. #10032
* [FEATURE] Add support for GEM's federation-frontend. See the `federation_frontend` section in the values file. #9673
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
kind: Deployment
triggers:
- metadata:
query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="distributor",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000
query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
{{- $cpu_request := dig "requests" "cpu" nil .Values.distributor.resources }}
threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.distributor.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }}
Expand All @@ -42,7 +42,7 @@ spec:
name: keda-triggger-auth
{{- end }}
- metadata:
query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="distributor",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0))
query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0))
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
{{- $mem_request := dig "requests" "memory" nil .Values.distributor.resources }}
threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.distributor.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ spec:
kind: Deployment
triggers:
- metadata:
query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[1m]))
query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m]))
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }}
{{- if .Values.kedaAutoscaling.customHeaders }}
Expand All @@ -45,7 +45,7 @@ spec:
name: keda-triggger-auth
{{- end }}
- metadata:
query: sum(rate(cortex_querier_request_duration_seconds_sum{container="querier",namespace="{{ .Release.Namespace }}"}[1m]))
query: sum(rate(cortex_querier_request_duration_seconds_sum{container="querier",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m]))
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }}
{{- if .Values.kedaAutoscaling.customHeaders }}
Expand All @@ -65,7 +65,7 @@ spec:
{{- $autoscaling := .Values.querier.kedaAutoscaling -}}
{{- if .Values.querier.kedaAutoscaling.predictiveScalingEnabled }}
- metadata:
query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[{{$autoscaling.predictiveScalingLookback}}] offset {{$autoscaling.predictiveScalingPeriod}}))
query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[{{$autoscaling.predictiveScalingLookback}}] offset {{$autoscaling.predictiveScalingPeriod}}))
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
threshold: {{ $autoscaling.querySchedulerInflightRequestsThreshold | quote }}
{{- if .Values.kedaAutoscaling.customHeaders }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
kind: Deployment
triggers:
- metadata:
query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000
query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
{{- $cpu_request := dig "requests" "cpu" nil .Values.query_frontend.resources }}
threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.query_frontend.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }}
Expand All @@ -42,7 +42,7 @@ spec:
name: keda-triggger-auth
{{- end }}
- metadata:
query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="query-frontend",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0))
query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0))
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
{{- $mem_request := dig "requests" "memory" nil .Values.query_frontend.resources }}
threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.query_frontend.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ spec:
kind: Deployment
triggers:
- metadata:
query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="ruler-query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[1m]))
query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="ruler-query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m]))
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
threshold: {{ .Values.ruler_querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }}
{{- if .Values.kedaAutoscaling.customHeaders }}
Expand All @@ -43,7 +43,7 @@ spec:
name: keda-triggger-auth
{{- end }}
- metadata:
query: sum(rate(cortex_querier_request_duration_seconds_sum{container="ruler-querier",namespace="{{ .Release.Namespace }}"}[1m]))
query: sum(rate(cortex_querier_request_duration_seconds_sum{container="ruler-querier",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m]))
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
threshold: {{ .Values.ruler_querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }}
{{- if .Values.kedaAutoscaling.customHeaders }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ spec:
kind: Deployment
triggers:
- metadata:
query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000
query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
{{- $cpu_request := dig "requests" "cpu" nil .Values.ruler_query_frontend.resources }}
threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.ruler_query_frontend.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }}
Expand All @@ -43,7 +43,7 @@ spec:
name: keda-triggger-auth
{{- end }}
- metadata:
query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0))
query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0))
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
{{- $mem_request := dig "requests" "memory" nil .Values.ruler_query_frontend.resources }}
threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.ruler_query_frontend.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
kind: Deployment
triggers:
- metadata:
query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000
query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
{{- $cpu_request := dig "requests" "cpu" nil .Values.ruler.resources }}
threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.ruler.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }}
Expand All @@ -42,7 +42,7 @@ spec:
name: keda-triggger-auth
{{- end }}
- metadata:
query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0))
query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0))
serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }}
{{- $mem_request := dig "requests" "memory" nil .Values.ruler.resources }}
threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.ruler.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }}
Expand Down
4 changes: 4 additions & 0 deletions operations/helm/charts/mimir-distributed/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,10 @@ kedaAutoscaling:
ignoreNullValues: true
unsafeSsl: false

# If your metrics are stored in a datasource with multiple mimir instances extra labels to filter the data are required to get the right metric.
customLabelFilter: {}
# cluster: "my-cluster-name"

# --KEDA trigger authentication settings.
# ref: https://keda.sh/docs/2.16/scalers/pulsar/#authentication-parameters
authentication:
Expand Down

0 comments on commit b615cd0

Please sign in to comment.