From 6740b199421071e8bb85a7854540b7d7b486ec84 Mon Sep 17 00:00:00 2001 From: Bjorn Date: Tue, 17 Dec 2024 15:32:52 +0100 Subject: [PATCH] Add customLabelFilter --- .../helm/charts/mimir-distributed/CHANGELOG.md | 1 + .../templates/distributor/distributor-so.yaml | 4 ++-- .../templates/lib/CustomLabels.tpl | 17 +++++++++++++++++ .../templates/querier/querier-so.yaml | 6 +++--- .../query-frontend/query-frontend-so.yaml | 4 ++-- .../ruler-querier/ruler-querier-so.yaml | 4 ++-- .../ruler-query-frontend-so.yaml | 4 ++-- .../templates/ruler/ruler-so.yaml | 4 ++-- .../helm/charts/mimir-distributed/values.yaml | 4 ++++ 9 files changed, 35 insertions(+), 13 deletions(-) create mode 100644 operations/helm/charts/mimir-distributed/templates/lib/CustomLabels.tpl diff --git a/operations/helm/charts/mimir-distributed/CHANGELOG.md b/operations/helm/charts/mimir-distributed/CHANGELOG.md index 5b6cccba473..12458d1202e 100644 --- a/operations/helm/charts/mimir-distributed/CHANGELOG.md +++ b/operations/helm/charts/mimir-distributed/CHANGELOG.md @@ -28,6 +28,7 @@ Entries should be ordered as follows: Entries should include a reference to the Pull Request that introduced the change. ## main / unreleased +* [ENHANCEMENT] Add extra values for KEDA auto scaling. #10265 ## 5.6.0-rc.0 diff --git a/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml b/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml index fe17393246b..7f3ee77c41b 100644 --- a/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml @@ -24,7 +24,7 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="distributor",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.distributor.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.distributor.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} @@ -39,7 +39,7 @@ spec: name: {{ include "mimir.resourceName" (dict "ctx" .) }} {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="distributor",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.distributor.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.distributor.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} diff --git a/operations/helm/charts/mimir-distributed/templates/lib/CustomLabels.tpl b/operations/helm/charts/mimir-distributed/templates/lib/CustomLabels.tpl new file mode 100644 index 00000000000..ce53d3ef539 --- /dev/null +++ b/operations/helm/charts/mimir-distributed/templates/lib/CustomLabels.tpl @@ -0,0 +1,17 @@ +{{/* +Convert labels to string like: key1=value1, key2=value2, ... +Example: + customLabelFilter: + cluster: "my-cluster-name" +becomes: + cluster="my-cluster-name" +*/}} +{{- define "customLabelFilter" -}} + {{- if . }} + {{- $labels := "" }} + {{- range $key, $value := . }} + {{- $labels = printf "%s%s=\"%s\"," $labels $key $value }} + {{- end }} + {{- trimSuffix "," $labels -}} + {{- end }} +{{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml b/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml index ec41bdc3544..6da3d95971e 100644 --- a/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml @@ -27,7 +27,7 @@ spec: kind: Deployment triggers: - metadata: - query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[1m])) + query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} @@ -42,7 +42,7 @@ spec: name: {{ include "mimir.resourceName" (dict "ctx" .) }} {{- end }} - metadata: - query: sum(rate(cortex_querier_request_duration_seconds_sum{container="querier",namespace="{{ .Release.Namespace }}"}[1m])) + query: sum(rate(cortex_querier_request_duration_seconds_sum{container="querier",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} @@ -59,7 +59,7 @@ spec: {{- $autoscaling := .Values.querier.kedaAutoscaling -}} {{- if .Values.querier.kedaAutoscaling.predictiveScalingEnabled }} - metadata: - query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[{{$autoscaling.predictiveScalingLookback}}] offset {{$autoscaling.predictiveScalingPeriod}})) + query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[{{$autoscaling.predictiveScalingLookback}}] offset {{$autoscaling.predictiveScalingPeriod}})) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ $autoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} diff --git a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml index 8ff1e4a0094..9b93b4b1b35 100644 --- a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml @@ -24,7 +24,7 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.query_frontend.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.query_frontend.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} @@ -39,7 +39,7 @@ spec: name: {{ include "mimir.resourceName" (dict "ctx" .) }} {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="query-frontend",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.query_frontend.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.query_frontend.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml b/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml index f2d68eaabc6..204639648d1 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml @@ -25,7 +25,7 @@ spec: kind: Deployment triggers: - metadata: - query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="ruler-query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[1m])) + query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="ruler-query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.ruler_querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} @@ -40,7 +40,7 @@ spec: name: {{ include "mimir.resourceName" (dict "ctx" .) }} {{- end }} - metadata: - query: sum(rate(cortex_querier_request_duration_seconds_sum{container="ruler-querier",namespace="{{ .Release.Namespace }}"}[1m])) + query: sum(rate(cortex_querier_request_duration_seconds_sum{container="ruler-querier",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.ruler_querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml b/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml index b196d6516d9..d2407745a3b 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml @@ -25,7 +25,7 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.ruler_query_frontend.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.ruler_query_frontend.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} @@ -40,7 +40,7 @@ spec: name: {{ include "mimir.resourceName" (dict "ctx" .) }} {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.ruler_query_frontend.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.ruler_query_frontend.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml b/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml index 2c1d82603a8..aea7bfa9aaf 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml @@ -24,7 +24,7 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.ruler.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.ruler.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} @@ -39,7 +39,7 @@ spec: name: {{ include "mimir.resourceName" (dict "ctx" .) }} {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.ruler.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.ruler.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} diff --git a/operations/helm/charts/mimir-distributed/values.yaml b/operations/helm/charts/mimir-distributed/values.yaml index e700ad84b4c..ceab25d7eac 100644 --- a/operations/helm/charts/mimir-distributed/values.yaml +++ b/operations/helm/charts/mimir-distributed/values.yaml @@ -545,6 +545,10 @@ kedaAutoscaling: ignoreNullValues: true unsafeSsl: false + # If your metrics are stored in a datasource with multiple mimir instances extra labels to filter the data are required to get the right metric. + customLabelFilter: {} + # cluster: "my-cluster-name" + # --KEDA trigger authentication settings. # ref: https://keda.sh/docs/2.16/scalers/pulsar/#authentication-parameters authentication: