diff --git a/operations/helm/charts/mimir-distributed/CHANGELOG.md b/operations/helm/charts/mimir-distributed/CHANGELOG.md index 5b6cccba473..12458d1202e 100644 --- a/operations/helm/charts/mimir-distributed/CHANGELOG.md +++ b/operations/helm/charts/mimir-distributed/CHANGELOG.md @@ -28,6 +28,7 @@ Entries should be ordered as follows: Entries should include a reference to the Pull Request that introduced the change. ## main / unreleased +* [ENHANCEMENT] Add extra values for KEDA auto scaling. #10265 ## 5.6.0-rc.0 diff --git a/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml b/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml index 7555175daf1..7f3ee77c41b 100644 --- a/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/distributor/distributor-so.yaml @@ -24,21 +24,33 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="distributor",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 - serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 + serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.distributor.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.distributor.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="distributor",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) - serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) + serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.distributor.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.distributor.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/keda/keda-trigger-auth.yaml b/operations/helm/charts/mimir-distributed/templates/keda/keda-trigger-auth.yaml new file mode 100644 index 00000000000..16e7477ecd1 --- /dev/null +++ b/operations/helm/charts/mimir-distributed/templates/keda/keda-trigger-auth.yaml @@ -0,0 +1,12 @@ +{{- if .Values.kedaAutoscaling.authentication.enabled }} +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: keda-triggger-auth + namespace: {{ .Release.Namespace | quote }} +spec: + secretTargetRef: + {{- with .Values.kedaAutoscaling.authentication.secretTargetRef }} + {{- toYaml . | nindent 2 }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/operations/helm/charts/mimir-distributed/templates/lib/CustomLabels.tpl b/operations/helm/charts/mimir-distributed/templates/lib/CustomLabels.tpl new file mode 100644 index 00000000000..ce53d3ef539 --- /dev/null +++ b/operations/helm/charts/mimir-distributed/templates/lib/CustomLabels.tpl @@ -0,0 +1,17 @@ +{{/* +Convert labels to string like: key1=value1, key2=value2, ... +Example: + customLabelFilter: + cluster: "my-cluster-name" +becomes: + cluster="my-cluster-name" +*/}} +{{- define "customLabelFilter" -}} + {{- if . }} + {{- $labels := "" }} + {{- range $key, $value := . }} + {{- $labels = printf "%s%s=\"%s\"," $labels $key $value }} + {{- end }} + {{- trimSuffix "," $labels -}} + {{- end }} +{{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml b/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml index 02d2990101e..6da3d95971e 100644 --- a/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/querier/querier-so.yaml @@ -27,33 +27,51 @@ spec: kind: Deployment triggers: - metadata: - query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[1m])) - serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} + query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) + serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} name: cortex_querier_hpa_default type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} - metadata: - query: sum(rate(cortex_querier_request_duration_seconds_sum{container="querier",namespace="{{ .Release.Namespace }}"}[1m])) - serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} + query: sum(rate(cortex_querier_request_duration_seconds_sum{container="querier",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) + serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} name: cortex_querier_hpa_default_requests_duration type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} {{- $autoscaling := .Values.querier.kedaAutoscaling -}} {{- if .Values.querier.kedaAutoscaling.predictiveScalingEnabled }} - metadata: - query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[{{$autoscaling.predictiveScalingLookback}}] offset {{$autoscaling.predictiveScalingPeriod}})) + query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[{{$autoscaling.predictiveScalingLookback}}] offset {{$autoscaling.predictiveScalingPeriod}})) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ $autoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} name: cortex_querier_hpa_default_predictive type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} {{- end }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml index 73426d35342..9b93b4b1b35 100644 --- a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-so.yaml @@ -24,21 +24,33 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 - serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 + serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.query_frontend.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.query_frontend.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="query-frontend",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) - serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) + serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.query_frontend.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.query_frontend.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml b/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml index 677dca4e12c..204639648d1 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-so.yaml @@ -25,22 +25,34 @@ spec: kind: Deployment triggers: - metadata: - query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="ruler-query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[1m])) - serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} + query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="ruler-query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) + serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.ruler_querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} name: cortex_querier_hpa_default type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} - metadata: - query: sum(rate(cortex_querier_request_duration_seconds_sum{container="ruler-querier",namespace="{{ .Release.Namespace }}"}[1m])) - serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} + query: sum(rate(cortex_querier_request_duration_seconds_sum{container="ruler-querier",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[1m])) + serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} threshold: {{ .Values.ruler_querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} name: cortex_querier_hpa_default_requests_duration type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} {{- end }} {{- end }} \ No newline at end of file diff --git a/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml b/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml index d1a68e80463..d2407745a3b 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-so.yaml @@ -25,22 +25,34 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.ruler_query_frontend.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.ruler_query_frontend.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.ruler_query_frontend.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.ruler_query_frontend.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} {{- end }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml b/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml index 670037e1a08..aea7bfa9aaf 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler/ruler-so.yaml @@ -24,21 +24,33 @@ spec: kind: Deployment triggers: - metadata: - query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000 - serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} + query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[5m])) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0)[15m:]) * 1000 + serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $cpu_request := dig "requests" "cpu" nil .Values.ruler.resources }} threshold: {{ mulf (include "mimir.cpuToMilliCPU" (dict "value" $cpu_request)) (divf .Values.ruler.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} - metadata: - query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0)) - serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} + query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler",namespace="{{ .Release.Namespace }}", resource="memory"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler",namespace="{{ .Release.Namespace }}"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler",namespace="{{ .Release.Namespace }}", reason="OOMKilled"{{- if .Values.kedaAutoscaling.customLabelFilter }}, {{ include "customLabelFilter" .Values.kedaAutoscaling.customLabelFilter }}{{- end }}}) or vector(0)) + serverAddress: {{ include "mimir.kedaPrometheusAddress" (dict "ctx" $) }} {{- $mem_request := dig "requests" "memory" nil .Values.ruler.resources }} threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.ruler.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }} {{- if .Values.kedaAutoscaling.customHeaders }} customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.kedaAutoscaling.customHeaders)) | quote }} {{- end }} + ignoreNullValues: {{ .Values.kedaAutoscaling.ignoreNullValues }} + unsafeSsl: {{ .Values.kedaAutoscaling.ignoreNullValues }} type: prometheus + {{- if .Values.kedaAutoscaling.authentication.enabled }} + authenticationRef: + name: {{ include "mimir.resourceName" (dict "ctx" .) }} + {{- end }} {{- end }} \ No newline at end of file diff --git a/operations/helm/charts/mimir-distributed/values.yaml b/operations/helm/charts/mimir-distributed/values.yaml index 85a186f32fb..ceab25d7eac 100644 --- a/operations/helm/charts/mimir-distributed/values.yaml +++ b/operations/helm/charts/mimir-distributed/values.yaml @@ -542,7 +542,25 @@ kedaAutoscaling: prometheusAddress: "" customHeaders: {} pollingInterval: 10 + ignoreNullValues: true + unsafeSsl: false + # If your metrics are stored in a datasource with multiple mimir instances extra labels to filter the data are required to get the right metric. + customLabelFilter: {} + # cluster: "my-cluster-name" + + # --KEDA trigger authentication settings. + # ref: https://keda.sh/docs/2.16/scalers/pulsar/#authentication-parameters + authentication: + enabled: false + authModes: "" + secretTargetRef: [] + # - key: username + # name: my-secret-name + # parameter: username + # - key: password + # name: my-secret-name + # parameter: password alertmanager: enabled: true # -- Total number of replicas for the alertmanager across all availability zones