From 2eb4f0bb2794f2ef27887cb74652cd4560407f10 Mon Sep 17 00:00:00 2001 From: Bjorn Date: Tue, 17 Dec 2024 15:32:52 +0100 Subject: [PATCH] Add customLabelFilter --- .../helm/charts/mimir-distributed/CHANGELOG.md | 1 + .../templates/distributor/distributor-so.yaml | 4 ++-- .../templates/lib/CustomLabels.tpl | 17 +++++++++++++++++ .../templates/querier/querier-so.yaml | 6 +++--- .../query-frontend/query-frontend-so.yaml | 4 ++-- .../ruler-querier/ruler-querier-so.yaml | 4 ++-- .../ruler-query-frontend-so.yaml | 4 ++-- .../templates/ruler/ruler-so.yaml | 4 ++-- .../helm/charts/mimir-distributed/values.yaml | 4 ++++ 9 files changed, 35 insertions(+), 13 deletions(-) create mode 100644 operations/helm/charts/mimir-distributed/templates/lib/CustomLabels.tpl diff --git a/operations/helm/charts/mimir-distributed/CHANGELOG.md b/operations/helm/charts/mimir-distributed/CHANGELOG.md index cff08db928c..26f7ac587af 100644 --- a/operations/helm/charts/mimir-distributed/CHANGELOG.md +++ b/operations/helm/charts/mimir-distributed/CHANGELOG.md @@ -29,6 +29,7 @@ Entries should include a reference to the Pull Request that introduced the chang ## main / unreleased +* [ENHANCEMENT] Add extra values for KEDA auto scaling. #10265 * [CHANGE] Update rollout-operator version to 0.20.0. #9995 * [CHANGE] Remove the `track_sizes` feature for Memcached pods since it is unused. #10032 * [FEATURE] Add support for GEM's federation-frontend. See the `federation_frontend` section in the values file. #9673 diff --git a/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml b/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml index cfc82351dab..954af7c2471 100644 --- a/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml @@ -24,7 +24,7 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="distributor",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.distributor.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.distributor.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} @@ -42,7 +42,7 @@ spec: name: keda-triggger-auth {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="distributor",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.distributor.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.distributor.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} diff --git a/operations/helm/charts/mimir-distributed/templates/lib/CustomLabels.tpl b/operations/helm/charts/mimir-distributed/templates/lib/CustomLabels.tpl new file mode 100644 index 00000000000..ce53d3ef539 --- /dev/null +++ b/operations/helm/charts/mimir-distributed/templates/lib/CustomLabels.tpl @@ -0,0 +1,17 @@ +{{/* +Convert labels to string like: key1=value1, key2=value2, ... +Example: + customLabelFilter: + cluster: "my-cluster-name" +becomes: + cluster="my-cluster-name" +*/}} +{{- define "customLabelFilter" -}} + {{- if . }} + {{- $labels := "" }} + {{- range $key, $value := . }} + {{- $labels = printf "%s%s=\"%s\"," $labels $key $value }} + {{- end }} + {{- trimSuffix "," $labels -}} + {{- end }} +{{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml b/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml index 60c9e1092ba..105bfaf1146 100644 --- a/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml @@ -27,7 +27,7 @@ spec: kind: Deployment triggers: - metadata: - query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[1m])) + query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} @@ -45,7 +45,7 @@ spec: name: keda-triggger-auth {{- end }} - metadata: - query: sum(rate(cortex_querier_request_duration_seconds_sum{container="querier",namespace="{{ .Release.Namespace }}"}[1m])) + query: sum(rate(cortex_querier_request_duration_seconds_sum{container="querier",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} @@ -65,7 +65,7 @@ spec: {{- $autoscaling := .Values.querier.kedaAutoscaling -}} {{- if .Values.querier.kedaAutoscaling.predictiveScalingEnabled }} - metadata: - query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[{{$autoscaling.predictiveScalingLookback}}] offset {{$autoscaling.predictiveScalingPeriod}})) + query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[{{$autoscaling.predictiveScalingLookback}}] offset {{$autoscaling.predictiveScalingPeriod}})) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ $autoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} diff --git a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml index 49ab3a5e098..40f28453c06 100644 --- a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml @@ -24,7 +24,7 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.query_frontend.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.query_frontend.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} @@ -42,7 +42,7 @@ spec: name: keda-triggger-auth {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="query-frontend",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.query_frontend.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.query_frontend.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml b/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml index 9ee51e99146..ab36207bb92 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml @@ -25,7 +25,7 @@ spec: kind: Deployment triggers: - metadata: - query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="ruler-query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[1m])) + query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="ruler-query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.ruler_querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} @@ -43,7 +43,7 @@ spec: name: keda-triggger-auth {{- end }} - metadata: - query: sum(rate(cortex_querier_request_duration_seconds_sum{container="ruler-querier",namespace="{{ .Release.Namespace }}"}[1m])) + query: sum(rate(cortex_querier_request_duration_seconds_sum{container="ruler-querier",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.ruler_querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml b/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml index 648f60397a4..3b3c105315e 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml @@ -25,7 +25,7 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.ruler_query_frontend.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.ruler_query_frontend.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} @@ -43,7 +43,7 @@ spec: name: keda-triggger-auth {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.ruler_query_frontend.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.ruler_query_frontend.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml b/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml index 96f588e7dc9..f1e8fbc4faa 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml @@ -24,7 +24,7 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.ruler.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.ruler.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} @@ -42,7 +42,7 @@ spec: name: keda-triggger-auth {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.ruler.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.ruler.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} diff --git a/operations/helm/charts/mimir-distributed/values.yaml b/operations/helm/charts/mimir-distributed/values.yaml index 07ebbb793c6..06608b244a6 100644 --- a/operations/helm/charts/mimir-distributed/values.yaml +++ b/operations/helm/charts/mimir-distributed/values.yaml @@ -545,6 +545,10 @@ kedaAutoscaling: ignoreNullValues: true unsafeSsl: false + # If your metrics are stored in a datasource with multiple mimir instances extra labels to filter the data are required to get the right metric. + customLabelFilter: {} + # cluster: "my-cluster-name" + # --KEDA trigger authentication settings. # ref: https://keda.sh/docs/2.16/scalers/pulsar/#authentication-parameters authentication: