diff --git a/Makefile b/Makefile index b22c137f8..5a547d381 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ METRICS_CONFIG_FILES = $(subst values.yaml,metrics.alloy,$(INPUT_FILES)) EVENTS_CONFIG_FILES = $(subst values.yaml,events.alloy,$(INPUT_FILES)) LOGS_CONFIG_FILES = $(subst values.yaml,logs.alloy,$(INPUT_FILES)) PROFILES_CONFIG_FILES = $(subst values.yaml,profiles.alloy,$(INPUT_FILES)) +RULES_CONFIG_FILES = $(subst values.yaml,rules.alloy,$(INPUT_FILES)) CT_CONFIGFILE ?= .github/configs/ct.yaml LINT_CONFIGFILE ?= .github/configs/lintconf.yaml @@ -39,7 +40,7 @@ lint-chart: ct lint --debug --config "$(CT_CONFIGFILE)" --lint-conf "$(LINT_CONFIGFILE)" --check-version-increment=false lint-config lint-configs lint-alloy: - @./scripts/lint-alloy.sh $(METRICS_CONFIG_FILES) $(EVENTS_CONFIG_FILES) $(LOGS_CONFIG_FILES) --public-preview $(PROFILES_CONFIG_FILES) + @./scripts/lint-alloy.sh $(METRICS_CONFIG_FILES) $(EVENTS_CONFIG_FILES) $(LOGS_CONFIG_FILES) $(RULES_CONFIG_FILES) --public-preview $(PROFILES_CONFIG_FILES) # Shell Linting lint-sh lint-shell: @@ -98,7 +99,9 @@ test: scripts/test-runner.sh lint-chart lint-config %/profiles.alloy: %/output.yaml yq -r "select(.metadata.name==\"k8smon-alloy-profiles\") | .data[\"config.alloy\"] | select( . != null )" $< > $@ +%/rules.alloy: %/output.yaml + yq -r "select(.metadata.name==\"k8smon-alloy-rules\") | .data[\"config.alloy\"] | select( . != null )" $< > $@ -generate-example-outputs: $(OUTPUT_FILES) $(METRICS_CONFIG_FILES) $(EVENTS_CONFIG_FILES) $(LOGS_CONFIG_FILES) $(PROFILES_CONFIG_FILES) +generate-example-outputs: $(OUTPUT_FILES) $(METRICS_CONFIG_FILES) $(EVENTS_CONFIG_FILES) $(LOGS_CONFIG_FILES) $(PROFILES_CONFIG_FILES) $(RULES_CONFIG_FILES) regenerate-example-outputs: clean generate-example-outputs diff --git a/charts/k8s-monitoring/Chart.lock b/charts/k8s-monitoring/Chart.lock index f0a5da827..3e419964e 100644 --- a/charts/k8s-monitoring/Chart.lock +++ b/charts/k8s-monitoring/Chart.lock @@ -11,6 +11,9 @@ dependencies: - name: alloy repository: https://grafana.github.io/helm-charts version: 0.6.0 +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 0.3.2 - name: kube-state-metrics repository: https://prometheus-community.github.io/helm-charts version: 5.25.1 @@ -29,5 +32,5 @@ dependencies: - name: kepler repository: https://sustainable-computing-io.github.io/kepler-helm-chart version: 0.5.9 -digest: sha256:78cc014e2a726be60e168fa7d09facff16ff7ed399948403ff2e692ae8d24d91 -generated: "2024-08-14T17:25:14.684591-05:00" +digest: sha256:02e225df81ff2034d306bd6950a033fdcba0285c8ac4f7be31bbd13a03389e6b +generated: "2024-08-16T11:13:53.900883-05:00" diff --git a/charts/k8s-monitoring/Chart.yaml b/charts/k8s-monitoring/Chart.yaml index c6850e816..fdeecb25c 100644 --- a/charts/k8s-monitoring/Chart.yaml +++ b/charts/k8s-monitoring/Chart.yaml @@ -32,6 +32,11 @@ dependencies: version: 0.6.0 repository: https://grafana.github.io/helm-charts condition: profiles.enabled + - alias: alloy-rules + name: alloy + version: 0.3.2 + repository: https://grafana.github.io/helm-charts + condition: rules.enabled - name: kube-state-metrics version: 5.25.1 repository: https://prometheus-community.github.io/helm-charts diff --git a/charts/k8s-monitoring/README.md b/charts/k8s-monitoring/README.md index 82c4eef98..43e50b8b9 100644 --- a/charts/k8s-monitoring/README.md +++ b/charts/k8s-monitoring/README.md @@ -140,6 +140,7 @@ The Prometheus and Loki services may be hosted on the same cluster, or remotely | https://grafana.github.io/helm-charts | alloy-events(alloy) | 0.6.0 | | https://grafana.github.io/helm-charts | alloy-logs(alloy) | 0.6.0 | | https://grafana.github.io/helm-charts | alloy-profiles(alloy) | 0.6.0 | +| https://grafana.github.io/helm-charts | alloy-rules(alloy) | 0.3.2 | | https://opencost.github.io/opencost-helm-chart | opencost | 1.41.0 | | https://prometheus-community.github.io/helm-charts | kube-state-metrics | 5.25.1 | | https://prometheus-community.github.io/helm-charts | prometheus-node-exporter | 4.38.0 | @@ -841,6 +842,24 @@ The Prometheus and Loki services may be hosted on the same cluster, or remotely | receivers.zipkin.port | int | `9411` | Which port to use for the Zipkin receiver. This port needs to be opened in the alloy section below. | | receivers.zipkin.tls | object | `{}` | [TLS settings](https://grafana.com/docs/alloy/latest/reference/components/otelcol.receiver.zipkin/#tls-block) to configure for the Zipkin receiver. | +### Rules + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| rules.enabled | bool | `false` | Whether or not to enable the rules synchronization | + +### Rules (Loki) + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| rules.loki.enabled | bool | `true` | Whether or not to enable the Mimir rules synchronization | + +### Rules (Mimir) + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| rules.mimir.enabled | bool | `true` | Whether or not to enable the Mimir rules synchronization | + ### Test Job | Key | Type | Default | Description | @@ -872,6 +891,19 @@ The Prometheus and Loki services may be hosted on the same cluster, or remotely | Key | Type | Default | Description | |-----|------|---------|-------------| | extraObjects | list | `[]` | Deploy additional manifest objects | +| rules.loki.namespace.label_expressions | list | `[]` | Label expressions for Namespace resources. | +| rules.loki.namespace.label_selectors | object | `{}` | Label selector for Namespace resources. | +| rules.loki.prefix | string | alloy | Prefix to be added to the rule namespace, used to differentiate multiple Alloy deployments added. | +| rules.loki.rule.label_expressions | list | `[]` | Label expressions for PrometheusRule resources. Example: ```alloy - key: team operator: In values: ["ops"] ``` | +| rules.loki.rule.label_selectors | object | `{"rule_type":"loki"}` | Label selectors for PrometheusRule resources as key/pair values. Example: ```alloy label_selectors: rule_type: loki sync: "true" loki: "true" ``` | +| rules.loki.sync_interval | string | 5m | Amount of time between reconciliations with Mimir. | +| rules.mimir.namespace.label_expressions | list | `[]` | Label expressions for Namespace resources. | +| rules.mimir.namespace.label_selectors | object | `{}` | Label selector for Namespace resources. | +| rules.mimir.prefix | string | alloy | Prefix to be added to the rule namespace, used to differentiate multiple Alloy deployments added. | +| rules.mimir.prometheus_http_prefix | string | /api/prom | Path prefix for Mimir’s Prometheus endpoint (gem-path-prefix). | +| rules.mimir.rule.label_expressions | list | `[]` | Label expressions for PrometheusRule resources. Example: ```alloy - key: team operator: In values: ["ops"] ``` | +| rules.mimir.rule.label_selectors | object | `{"rule_type":"mimir"}` | Label selectors for PrometheusRule resources as key/pair values. Example: ```alloy label_selectors: rule_type: mimir sync: "true" mimir: "true" ``` | +| rules.mimir.sync_interval | string | 5m | Amount of time between reconciliations with Mimir. | ## Customizing the configuration diff --git a/charts/k8s-monitoring/charts/alloy-0.3.2.tgz b/charts/k8s-monitoring/charts/alloy-0.3.2.tgz new file mode 100644 index 000000000..cddb68d19 Binary files /dev/null and b/charts/k8s-monitoring/charts/alloy-0.3.2.tgz differ diff --git a/charts/k8s-monitoring/templates/_configs.tpl b/charts/k8s-monitoring/templates/_configs.tpl index 2b9d1eec2..5621b497b 100644 --- a/charts/k8s-monitoring/templates/_configs.tpl +++ b/charts/k8s-monitoring/templates/_configs.tpl @@ -153,3 +153,11 @@ {{- include "alloy.config.logging" (index .Values "alloy-profiles").logging }} {{- include "alloy.config.liveDebugging" (index .Values "alloy-profiles").liveDebugging}} {{- end -}} + +{{/* Grafana Alloy for Rules config */}} +{{- define "alloyRulesConfig" -}} + {{- include "alloy.config.rulesMimir" . }} + {{- include "alloy.config.rulesLoki" . }} + + {{- include "alloy.config.logging" (index .Values "alloy-rules").logging }} +{{- end -}} diff --git a/charts/k8s-monitoring/templates/alloy-rules-config.yaml b/charts/k8s-monitoring/templates/alloy-rules-config.yaml new file mode 100644 index 000000000..0f6651103 --- /dev/null +++ b/charts/k8s-monitoring/templates/alloy-rules-config.yaml @@ -0,0 +1,10 @@ +{{- if .Values.rules.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "alloy.fullname" (index .Subcharts "alloy-rules") }} + namespace: {{ .Release.Namespace }} +data: + config.alloy: |- + {{- include "alloyRulesConfig" . | trim | nindent 4 }} +{{- end }} diff --git a/charts/k8s-monitoring/templates/alloy_config/_rules_loki.alloy.txt b/charts/k8s-monitoring/templates/alloy_config/_rules_loki.alloy.txt new file mode 100644 index 000000000..198c7e70d --- /dev/null +++ b/charts/k8s-monitoring/templates/alloy_config/_rules_loki.alloy.txt @@ -0,0 +1,74 @@ +{{ define "alloy.config.rulesLoki" }} +{{- if .Values.rules.loki.enabled }} +// Logs Service +remote.kubernetes.secret "logs_service" { + name = {{ include "kubernetes_monitoring.logs_service.secret.name" . | quote}} + namespace = {{ .Values.externalServices.loki.secret.namespace | default .Release.Namespace | quote }} +} +// Rules +loki.rules.kubernetes "rules_service" { + address = nonsensitive(remote.kubernetes.secret.logs_service.data[{{ .Values.externalServices.loki.hostKey | quote }}]) + {{- if or (and (eq .Values.externalServices.loki.secret.create true) (.Values.externalServices.loki.tenantId) (eq .Values.externalServices.loki.secret.create false) ) }} + headers = nonsensitive(coalesce(remote.kubernetes.secret.logs_service.data[{{ .Values.externalServices.loki.tenantIdKey | quote }}], "")) + {{- end }} + sync_interval = {{ .Values.rules.loki.sync_interval | quote }} + loki_namespace_prefix = {{ .Values.rules.loki.prefix | quote }} +{{- if .Values.externalServices.loki.proxyURL }} + proxy_url = {{ .Values.externalServices.loki.proxyURL | quote }} +{{- end }} +{{ if eq .Values.externalServices.loki.authMode "basic" }} + basic_auth { + username = nonsensitive(remote.kubernetes.secret.logs_service.data[{{ .Values.externalServices.loki.basicAuth.usernameKey | quote }}]) + password = remote.kubernetes.secret.logs_service.data[{{ .Values.externalServices.loki.basicAuth.passwordKey | quote }}] + } +{{- end }} + rule_namespace_selector { + {{- if .Values.rules.loki.namespace.label_selectors }} + match_labels = { + {{- range $key, $value := .Values.rules.loki.namespace.label_selectors }} + {{ $key }} = "{{ $value }}", + {{- end }} + } + {{- end }} + + {{- if .Values.rules.loki.namespace.label_expressions }} + {{- range $expr := .Values.rules.loki.namespace.label_expressions }} + match_expression { + key = "{{ $expr.key }}" + operator = "{{ $expr.operator }}" + values = [ + {{- range $index, $value := $expr.values }} + {{- if $index }}, {{ end }}"{{ $value }}" + {{- end }} + ] + } + {{- end }} + {{- end }} + } + + rule_selector { + {{- if .Values.rules.loki.rule.label_selectors }} + match_labels = { + {{- range $key, $value := .Values.rules.loki.rule.label_selectors }} + {{ $key }} = "{{ $value }}", + {{- end }} + } + {{- end }} + + {{- if .Values.rules.loki.rule.label_expressions }} + {{- range $expr := .Values.rules.loki.rule.label_expressions }} + match_expression { + key = "{{ $expr.key }}" + operator = "{{ $expr.operator }}" + values = [ + {{- range $index, $value := $expr.values }} + {{- if $index }}, {{ end }}"{{ $value }}" + {{- end }} + ] + } + {{- end }} + {{- end }} + } +} +{{- end }} +{{- end }} diff --git a/charts/k8s-monitoring/templates/alloy_config/_rules_mimir.alloy.txt b/charts/k8s-monitoring/templates/alloy_config/_rules_mimir.alloy.txt new file mode 100644 index 000000000..d5d5982c4 --- /dev/null +++ b/charts/k8s-monitoring/templates/alloy_config/_rules_mimir.alloy.txt @@ -0,0 +1,75 @@ +{{ define "alloy.config.rulesMimir" }} +{{- if .Values.rules.mimir.enabled }} +// Metrics Service +remote.kubernetes.secret "metrics_service" { + name = {{ include "kubernetes_monitoring.metrics_service.secret.name" . | quote }} + namespace = {{ .Values.externalServices.prometheus.secret.namespace | default .Release.Namespace | quote }} +} +// Rules +mimir.rules.kubernetes "rules_service" { + address = nonsensitive(remote.kubernetes.secret.metrics_service.data[{{ .Values.externalServices.prometheus.hostKey | quote }}]) + {{- if or (and (eq .Values.externalServices.prometheus.secret.create true) (.Values.externalServices.prometheus.tenantId) (eq .Values.externalServices.prometheus.secret.create false) ) }} + headers = nonsensitive(coalesce(remote.kubernetes.secret.metrics_service.data[{{ .Values.externalServices.prometheus.tenantIdKey | quote }}], "")) + {{- end }} + sync_interval = {{ .Values.rules.mimir.sync_interval | quote }} + prometheus_http_prefix = {{ .Values.rules.mimir.prometheus_http_prefix | quote }} + mimir_namespace_prefix = {{ .Values.rules.mimir.prefix | quote }} +{{- if .Values.externalServices.prometheus.proxyURL }} + proxy_url = {{ .Values.externalServices.prometheus.proxyURL | quote }} +{{- end }} +{{ if eq .Values.externalServices.prometheus.authMode "basic" }} + basic_auth { + username = nonsensitive(remote.kubernetes.secret.metrics_service.data[{{ .Values.externalServices.prometheus.basicAuth.usernameKey | quote }}]) + password = remote.kubernetes.secret.metrics_service.data[{{ .Values.externalServices.prometheus.basicAuth.passwordKey | quote }}] + } +{{- end }} + rule_namespace_selector { + {{- if .Values.rules.mimir.namespace.label_selectors }} + match_labels = { + {{- range $key, $value := .Values.rules.mimir.namespace.label_selectors }} + {{ $key }} = "{{ $value }}", + {{- end }} + } + {{- end }} + + {{- if .Values.rules.mimir.namespace.label_expressions }} + {{- range $expr := .Values.rules.mimir.namespace.label_expressions }} + match_expression { + key = "{{ $expr.key }}" + operator = "{{ $expr.operator }}" + values = [ + {{- range $index, $value := $expr.values }} + {{- if $index }}, {{ end }}"{{ $value }}" + {{- end }} + ] + } + {{- end }} + {{- end }} + } + + rule_selector { + {{- if .Values.rules.mimir.rule.label_selectors }} + match_labels = { + {{- range $key, $value := .Values.rules.mimir.rule.label_selectors }} + {{ $key }} = "{{ $value }}", + {{- end }} + } + {{- end }} + + {{- if .Values.rules.mimir.rule.label_expressions }} + {{- range $expr := .Values.rules.mimir.rule.label_expressions }} + match_expression { + key = "{{ $expr.key }}" + operator = "{{ $expr.operator }}" + values = [ + {{- range $index, $value := $expr.values }} + {{- if $index }}, {{ end }}"{{ $value }}" + {{- end }} + ] + } + {{- end }} + {{- end }} + } +} +{{- end }} +{{- end }} diff --git a/charts/k8s-monitoring/values.schema.json b/charts/k8s-monitoring/values.schema.json index 5bf71489e..84a23e782 100644 --- a/charts/k8s-monitoring/values.schema.json +++ b/charts/k8s-monitoring/values.schema.json @@ -138,6 +138,81 @@ } } }, + "alloy-rules": { + "type": "object", + "properties": { + "alloy": { + "type": "object", + "properties": { + "clustering": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + } + } + }, + "configMap": { + "type": "object", + "properties": { + "create": { + "type": "boolean" + } + } + } + } + }, + "controller": { + "type": "object", + "properties": { + "nodeSelector": { + "type": "object", + "properties": { + "kubernetes.io/os": { + "type": "string" + } + } + }, + "tolerations": { + "type": "array", + "items": { + "type": "object", + "properties": { + "effect": { + "type": "string" + }, + "operator": { + "type": "string" + } + } + } + }, + "type": { + "type": "string" + } + } + }, + "crds": { + "type": "object", + "properties": { + "create": { + "type": "boolean" + } + } + }, + "logging": { + "type": "object", + "properties": { + "format": { + "type": "string" + }, + "level": { + "type": "string" + } + } + } + } + }, "cluster": { "type": "object", "properties": { @@ -2283,6 +2358,99 @@ } } }, + "rules": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "loki": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "namespace": { + "type": "object", + "properties": { + "label_expressions": { + "type": "array" + }, + "label_selectors": { + "type": "object" + } + } + }, + "prefix": { + "type": "string" + }, + "rule": { + "type": "object", + "properties": { + "label_expressions": { + "type": "array" + }, + "label_selectors": { + "type": "object", + "properties": { + "rule_type": { + "type": "string" + } + } + } + } + }, + "sync_interval": { + "type": "string" + } + } + }, + "mimir": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "namespace": { + "type": "object", + "properties": { + "label_expressions": { + "type": "array" + }, + "label_selectors": { + "type": "object" + } + } + }, + "prefix": { + "type": "string" + }, + "prometheus_http_prefix": { + "type": "string" + }, + "rule": { + "type": "object", + "properties": { + "label_expressions": { + "type": "array" + }, + "label_selectors": { + "type": "object", + "properties": { + "rule_type": { + "type": "string" + } + } + } + } + }, + "sync_interval": { + "type": "string" + } + } + } + } + }, "test": { "type": "object", "properties": { diff --git a/charts/k8s-monitoring/values.yaml b/charts/k8s-monitoring/values.yaml index 8bf6cc696..ee1adfa38 100644 --- a/charts/k8s-monitoring/values.yaml +++ b/charts/k8s-monitoring/values.yaml @@ -1709,6 +1709,91 @@ profiles: - mutex - fgprof +# Settings related to the Rule syncing +rules: + # -- Whether or not to enable the rules synchronization + # @section -- Rules + enabled: false + + # Settings for Mimir rules, see: https://grafana.com/docs/alloy/latest/reference/components/mimir.rules.kubernetes/ + mimir: + # -- Whether or not to enable the Mimir rules synchronization + # @section -- Rules (Mimir) + enabled: true + # -- Amount of time between reconciliations with Mimir. + # @default -- 5m + sync_interval: 5m + # -- Path prefix for Mimir’s Prometheus endpoint (gem-path-prefix). + # @default -- /api/prom + prometheus_http_prefix: /api/prom + # -- Prefix to be added to the rule namespace, used to differentiate multiple Alloy deployments added. + # @default -- alloy + prefix: alloy + # Namespace selectors for Mimir rules. + namespace: + # -- Label selector for Namespace resources. + label_selectors: {} + # -- Label expressions for Namespace resources. + label_expressions: [] + # Selector for PrometheusRule resources. + rule: + # -- Label selectors for PrometheusRule resources as key/pair values. + # Example: + # ```alloy + # label_selectors: + # rule_type: mimir + # sync: "true" + # mimir: "true" + # ``` + label_selectors: + rule_type: mimir + # -- Label expressions for PrometheusRule resources. + # Example: + # ```alloy + # - key: team + # operator: In + # values: ["ops"] + # ``` + label_expressions: [] + + # Settings for Loki rules, see: https://grafana.com/docs/alloy/latest/reference/components/loki.rules.kubernetes/ + loki: + # -- Whether or not to enable the Mimir rules synchronization + # @section -- Rules (Loki) + enabled: true + # -- Amount of time between reconciliations with Mimir. + # @default -- 5m + sync_interval: 5m + # -- Prefix to be added to the rule namespace, used to differentiate multiple Alloy deployments added. + # @default -- alloy + prefix: alloy + # Namespace selectors for Loki rules. + namespace: + # -- Label selector for Namespace resources. + label_selectors: {} + # -- Label expressions for Namespace resources. + label_expressions: [] + # Selector for PrometheusRule resources. + rule: + # -- Label selectors for PrometheusRule resources as key/pair values. + # Example: + # ```alloy + # label_selectors: + # rule_type: loki + # sync: "true" + # loki: "true" + # ``` + label_selectors: + rule_type: loki + # -- Label expressions for PrometheusRule resources. + # Example: + # ```alloy + # - key: team + # operator: In + # values: ["ops"] + # ``` + label_expressions: [] + # Telemetry data receiver settings receivers: grpc: @@ -2404,6 +2489,43 @@ alloy-profiles: # @ignored crds: {create: false} + +# Settings for the Grafana Alloy instance that syncs PrometheusRules to Mimir and Loki +# See https://github.com/grafana/alloy/tree/main/operations/helm/charts/alloy for available values. +# @ignored -- This skips including these values in README.md +alloy-rules: + logging: + # -- Level at which Alloy log lines should be written. + # @section -- Chart + level: info + # -- Format to use for writing Alloy log lines. + # @section -- Chart + format: logfmt + + alloy: + # This chart is creating the configuration, so the alloy chart does not need to. + # @ignored + configMap: {create: false} + + # Disabling clustering by default, because the default log gathering format does not require clusters. + # @ignored + clustering: {enabled: false} + + controller: + # @ignored + type: statefulset + # @ignored + nodeSelector: + kubernetes.io/os: linux + + tolerations: + - effect: NoSchedule + operator: Exists + + # Skip installation of the Grafana Alloy CRDs, since we don't use them in this chart + # @ignored + crds: {create: false} + # -- Deploy additional manifest objects extraObjects: [] # - apiVersion: external-secrets.io/v1beta1 diff --git a/examples/alloy-autoscaling-and-storage/rules.alloy b/examples/alloy-autoscaling-and-storage/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/control-plane-metrics/rules.alloy b/examples/control-plane-metrics/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/custom-config/rules.alloy b/examples/custom-config/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/custom-metrics-tuning/rules.alloy b/examples/custom-metrics-tuning/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/custom-pricing/rules.alloy b/examples/custom-pricing/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/custom-prometheus-operator-rules/rules.alloy b/examples/custom-prometheus-operator-rules/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/default-values/rules.alloy b/examples/default-values/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/eks-fargate/rules.alloy b/examples/eks-fargate/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/extra-rules/rules.alloy b/examples/extra-rules/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/gke-autopilot/rules.alloy b/examples/gke-autopilot/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/ibm-cloud/rules.alloy b/examples/ibm-cloud/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/logs-journal/rules.alloy b/examples/logs-journal/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/logs-only/rules.alloy b/examples/logs-only/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/metric-module-imports-extra-config/rules.alloy b/examples/metric-module-imports-extra-config/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/metric-module-imports/rules.alloy b/examples/metric-module-imports/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/metrics-only/rules.alloy b/examples/metrics-only/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/openshift-compatible/rules.alloy b/examples/openshift-compatible/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/otel-metrics-service/rules.alloy b/examples/otel-metrics-service/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/pod-labels/rules.alloy b/examples/pod-labels/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/private-image-registry/rules.alloy b/examples/private-image-registry/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/profiles-enabled/rules.alloy b/examples/profiles-enabled/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/proxies/rules.alloy b/examples/proxies/rules.alloy new file mode 100644 index 000000000..e69de29bb diff --git a/examples/rules-sync/events.alloy b/examples/rules-sync/events.alloy new file mode 100644 index 000000000..266de8610 --- /dev/null +++ b/examples/rules-sync/events.alloy @@ -0,0 +1,48 @@ +// Cluster Events +loki.source.kubernetes_events "cluster_events" { + job_name = "integrations/kubernetes/eventhandler" + log_format = "logfmt" + forward_to = [ + loki.process.cluster_events.receiver, + ] +} + +loki.process "cluster_events" { + forward_to = [ + loki.process.logs_service.receiver, + ] +} + +// Logs Service +remote.kubernetes.secret "logs_service" { + name = "loki-k8s-monitoring" + namespace = "default" +} + +loki.process "logs_service" { + stage.static_labels { + values = { + cluster = "rules-sync", + } + } + forward_to = [loki.write.logs_service.receiver] +} + +// Loki +loki.write "logs_service" { + endpoint { + url = nonsensitive(remote.kubernetes.secret.logs_service.data["host"]) + "/loki/api/v1/push" + tenant_id = nonsensitive(remote.kubernetes.secret.logs_service.data["tenantId"]) + + basic_auth { + username = nonsensitive(remote.kubernetes.secret.logs_service.data["username"]) + password = remote.kubernetes.secret.logs_service.data["password"] + } + } +} + + +logging { + level = "info" + format = "logfmt" +} diff --git a/examples/rules-sync/logs.alloy b/examples/rules-sync/logs.alloy new file mode 100644 index 000000000..a7ea0e979 --- /dev/null +++ b/examples/rules-sync/logs.alloy @@ -0,0 +1,155 @@ +// Pod Logs +discovery.kubernetes "pods" { + role = "pod" + selectors { + role = "pod" + field = "spec.nodeName=" + env("HOSTNAME") + } +} + +discovery.relabel "pod_logs" { + targets = discovery.kubernetes.pods.targets + rule { + source_labels = ["__meta_kubernetes_namespace"] + action = "replace" + target_label = "namespace" + } + + rule { + source_labels = ["__meta_kubernetes_pod_name"] + action = "replace" + target_label = "pod" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + action = "replace" + target_label = "container" + } + rule { + source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"] + separator = "/" + action = "replace" + replacement = "$1" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] + separator = "/" + action = "replace" + replacement = "/var/log/pods/*$1/*.log" + target_label = "__path__" + } + + // set the container runtime as a label + rule { + action = "replace" + source_labels = ["__meta_kubernetes_pod_container_id"] + regex = "^(\\S+):\\/\\/.+$" + replacement = "$1" + target_label = "tmp_container_runtime" + } +} + +discovery.relabel "filtered_pod_logs" { + targets = discovery.relabel.pod_logs.output + rule { // Drop anything with a "falsy" annotation value + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_autogather"] + regex = "(false|no|skip)" + action = "drop" + } +} + +local.file_match "pod_logs" { + path_targets = discovery.relabel.filtered_pod_logs.output +} + +loki.source.file "pod_logs" { + targets = local.file_match.pod_logs.targets + forward_to = [loki.process.pod_logs.receiver] +} + +loki.process "pod_logs" { + stage.match { + selector = "{tmp_container_runtime=\"containerd\"}" + // the cri processing stage extracts the following k/v pairs: log, stream, time, flags + stage.cri {} + + // Set the extract flags and stream values as labels + stage.labels { + values = { + flags = "", + stream = "", + } + } + } + + stage.match { + selector = "{tmp_container_runtime=\"cri-o\"}" + // the cri processing stage extracts the following k/v pairs: log, stream, time, flags + stage.cri {} + + // Set the extract flags and stream values as labels + stage.labels { + values = { + flags = "", + stream = "", + } + } + } + + // if the label tmp_container_runtime from above is docker parse using docker + stage.match { + selector = "{tmp_container_runtime=\"docker\"}" + // the docker processing stage extracts the following k/v pairs: log, stream, time + stage.docker {} + + // Set the extract stream value as a label + stage.labels { + values = { + stream = "", + } + } + } + + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have + // cluster, namespace, pod, and container labels. + // Also drop the temporary container runtime label as it is no longer needed. + stage.label_drop { + values = ["filename", "tmp_container_runtime"] + } + forward_to = [loki.process.logs_service.receiver] +} + +// Logs Service +remote.kubernetes.secret "logs_service" { + name = "loki-k8s-monitoring" + namespace = "default" +} + +loki.process "logs_service" { + stage.static_labels { + values = { + cluster = "rules-sync", + } + } + forward_to = [loki.write.logs_service.receiver] +} + +// Loki +loki.write "logs_service" { + endpoint { + url = nonsensitive(remote.kubernetes.secret.logs_service.data["host"]) + "/loki/api/v1/push" + tenant_id = nonsensitive(remote.kubernetes.secret.logs_service.data["tenantId"]) + + basic_auth { + username = nonsensitive(remote.kubernetes.secret.logs_service.data["username"]) + password = remote.kubernetes.secret.logs_service.data["password"] + } + } +} + + +logging { + level = "info" + format = "logfmt" +} diff --git a/examples/rules-sync/metrics.alloy b/examples/rules-sync/metrics.alloy new file mode 100644 index 000000000..b9b41e0f3 --- /dev/null +++ b/examples/rules-sync/metrics.alloy @@ -0,0 +1,824 @@ +discovery.kubernetes "nodes" { + role = "node" +} + +discovery.kubernetes "services" { + role = "service" +} + +discovery.kubernetes "endpoints" { + role = "endpoints" +} + +discovery.kubernetes "pods" { + role = "pod" +} + +// OTLP Receivers +otelcol.receiver.otlp "receiver" { + debug_metrics { + disable_high_cardinality_metrics = true + } + + grpc { + endpoint = "0.0.0.0:4317" + } + + http { + endpoint = "0.0.0.0:4318" + } + output { + metrics = [otelcol.processor.resourcedetection.default.input] + logs = [otelcol.processor.resourcedetection.default.input] + } +} + + + + +// Processors +otelcol.processor.transform "add_metric_datapoint_attributes" { + // Grafana Cloud Kubernetes monitoring expects Loki labels `cluster`, `pod`, and `namespace` + error_mode = "ignore" + metric_statements { + context = "datapoint" + statements = [ + "set(attributes[\"deployment.environment\"], resource.attributes[\"deployment.environment\"])", + "set(attributes[\"service.version\"], resource.attributes[\"service.version\"])", + ] + } + output { + metrics = [otelcol.processor.k8sattributes.default.input] + } +} + +otelcol.processor.resourcedetection "default" { + detectors = ["env", "system"] + + system { + hostname_sources = ["os"] + } + + output { + metrics = [otelcol.processor.transform.add_metric_datapoint_attributes.input] + logs = [otelcol.processor.k8sattributes.default.input] + } +} + +otelcol.processor.k8sattributes "default" { + extract { + metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] + } + pod_association { + source { + from = "connection" + } + } + + output { + metrics = [otelcol.processor.transform.default.input] + logs = [otelcol.processor.transform.default.input] + } +} + +otelcol.processor.transform "default" { + // Grafana Cloud Kubernetes monitoring expects Loki labels `cluster`, `pod`, and `namespace` + error_mode = "ignore" + metric_statements { + context = "resource" + statements = [ + "set(attributes[\"k8s.cluster.name\"], \"rules-sync\") where attributes[\"k8s.cluster.name\"] == nil", + ] + } + log_statements { + context = "resource" + statements = [ + "set(attributes[\"pod\"], attributes[\"k8s.pod.name\"])", + "set(attributes[\"namespace\"], attributes[\"k8s.namespace.name\"])", + "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", + "set(attributes[\"k8s.cluster.name\"], \"rules-sync\") where attributes[\"k8s.cluster.name\"] == nil", + ] + } + output { + metrics = [otelcol.processor.filter.default.input] + logs = [otelcol.processor.filter.default.input] + } +} + +otelcol.processor.filter "default" { + error_mode = "ignore" + + output { + metrics = [otelcol.processor.batch.batch_processor.input] + logs = [otelcol.processor.batch.batch_processor.input] + } +} + +otelcol.processor.batch "batch_processor" { + send_batch_size = 16384 + send_batch_max_size = 0 + timeout = "2s" + output { + metrics = [otelcol.exporter.prometheus.metrics_converter.input] + logs = [otelcol.exporter.loki.logs_converter.input] + } +} +otelcol.exporter.prometheus "metrics_converter" { + forward_to = [prometheus.relabel.metrics_service.receiver] +} +otelcol.exporter.loki "logs_converter" { + forward_to = [loki.process.pod_logs.receiver] +} +// Annotation Autodiscovery +discovery.relabel "annotation_autodiscovery_pods" { + targets = discovery.kubernetes.pods.targets + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_scrape"] + regex = "true" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_job"] + action = "replace" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_instance"] + action = "replace" + target_label = "instance" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_path"] + action = "replace" + target_label = "__metrics_path__" + } + + // Choose the pod port + // The discovery generates a target for each declared container port of the pod. + // If the metricsPortName annotation has value, keep only the target where the port name matches the one of the annotation. + rule { + source_labels = ["__meta_kubernetes_pod_container_port_name"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portName"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_port_name"] + action = "keepequal" + target_label = "__tmp_port" + } + + // If the metrics port number annotation has a value, override the target address to use it, regardless whether it is + // one of the declared ports on that Pod. + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portNumber", "__meta_kubernetes_pod_ip"] + regex = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})" + replacement = "[$2]:$1" // IPv6 + target_label = "__address__" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portNumber", "__meta_kubernetes_pod_ip"] + regex = "(\\d+);((([0-9]+?)(\\.|$)){4})" // IPv4, takes priority over IPv6 when both exists + replacement = "$2:$1" + target_label = "__address__" + } + + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_scheme"] + action = "replace" + target_label = "__scheme__" + } +} + +discovery.relabel "annotation_autodiscovery_services" { + targets = discovery.kubernetes.services.targets + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_scrape"] + regex = "true" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_job"] + action = "replace" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_instance"] + action = "replace" + target_label = "instance" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_path"] + action = "replace" + target_label = "__metrics_path__" + } + + // Choose the service port + rule { + source_labels = ["__meta_kubernetes_service_port_name"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_portName"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_port_name"] + action = "keepequal" + target_label = "__tmp_port" + } + + rule { + source_labels = ["__meta_kubernetes_service_port_number"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_portNumber"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_port_number"] + action = "keepequal" + target_label = "__tmp_port" + } + + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_scheme"] + action = "replace" + target_label = "__scheme__" + } +} + +discovery.relabel "annotation_autodiscovery_http" { + targets = concat(discovery.relabel.annotation_autodiscovery_pods.output, discovery.relabel.annotation_autodiscovery_services.output) + rule { + source_labels = ["__scheme__"] + regex = "https" + action = "drop" + } +} + +discovery.relabel "annotation_autodiscovery_https" { + targets = concat(discovery.relabel.annotation_autodiscovery_pods.output, discovery.relabel.annotation_autodiscovery_services.output) + rule { + source_labels = ["__scheme__"] + regex = "https" + action = "keep" + } +} + +prometheus.scrape "annotation_autodiscovery_http" { + targets = discovery.relabel.annotation_autodiscovery_http.output + honor_labels = true + clustering { + enabled = true + } + forward_to = [prometheus.relabel.annotation_autodiscovery.receiver] +} + +prometheus.scrape "annotation_autodiscovery_https" { + targets = discovery.relabel.annotation_autodiscovery_https.output + honor_labels = true + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + tls_config { + insecure_skip_verify = true + } + clustering { + enabled = true + } + forward_to = [prometheus.relabel.annotation_autodiscovery.receiver] +} + +prometheus.relabel "annotation_autodiscovery" { + max_cache_size = 100000 + forward_to = [prometheus.relabel.metrics_service.receiver] +} + +// Grafana Alloy +discovery.relabel "alloy" { + targets = discovery.kubernetes.pods.targets + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_instance"] + regex = "k8smon" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + regex = "alloy.*" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_port_name"] + regex = "http-metrics" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } +} + +prometheus.scrape "alloy" { + job_name = "integrations/alloy" + targets = discovery.relabel.alloy.output + scrape_interval = "60s" + forward_to = [prometheus.relabel.alloy.receiver] + clustering { + enabled = true + } +} + +prometheus.relabel "alloy" { + max_cache_size = 100000 + rule { + source_labels = ["__name__"] + regex = "up|alloy_build_info" + action = "keep" + } + forward_to = [prometheus.relabel.metrics_service.receiver] +} + +// Kubernetes Monitoring Telemetry +prometheus.exporter.unix "kubernetes_monitoring_telemetry" { + set_collectors = ["textfile"] + textfile { + directory = "/etc/kubernetes-monitoring-telemetry" + } +} + +prometheus.scrape "kubernetes_monitoring_telemetry" { + job_name = "integrations/kubernetes/kubernetes_monitoring_telemetry" + targets = prometheus.exporter.unix.kubernetes_monitoring_telemetry.targets + scrape_interval = "60s" + clustering { + enabled = true + } + forward_to = [prometheus.relabel.kubernetes_monitoring_telemetry.receiver] +} + +prometheus.relabel "kubernetes_monitoring_telemetry" { + max_cache_size = 100000 + rule { + target_label = "job" + action = "replace" + replacement = "integrations/kubernetes/kubernetes_monitoring_telemetry" + } + rule { + target_label = "instance" + action = "replace" + replacement = "k8smon" + } + rule { + source_labels = ["__name__"] + regex = "up|grafana_kubernetes_monitoring_.*" + action = "keep" + } + forward_to = [prometheus.relabel.metrics_service.receiver] +} + +// Kubelet +discovery.relabel "kubelet" { + targets = discovery.kubernetes.nodes.targets +} + +prometheus.scrape "kubelet" { + job_name = "integrations/kubernetes/kubelet" + targets = discovery.relabel.kubelet.output + scheme = "https" + scrape_interval = "60s" + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + tls_config { + insecure_skip_verify = true + } + clustering { + enabled = true + } + forward_to = [prometheus.relabel.kubelet.receiver] +} + +prometheus.relabel "kubelet" { + max_cache_size = 100000 + rule { + source_labels = ["__name__"] + regex = "up|container_cpu_usage_seconds_total|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_server_ttl_seconds|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_count|kubelet_node_config_error|kubelet_node_name|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_count|kubelet_running_container_count|kubelet_running_containers|kubelet_running_pod_count|kubelet_running_pods|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_total|kubelet_server_expiration_renew_errors|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes|kubelet_volume_stats_inodes_used|kubernetes_build_info|namespace_workload_pod|rest_client_requests_total|storage_operation_duration_seconds_count|storage_operation_errors_total|volume_manager_total_volumes" + action = "keep" + } + forward_to = [prometheus.relabel.metrics_service.receiver] +} + +// cAdvisor +discovery.relabel "cadvisor" { + targets = discovery.kubernetes.nodes.targets + rule { + replacement = "/metrics/cadvisor" + target_label = "__metrics_path__" + } +} + +prometheus.scrape "cadvisor" { + job_name = "integrations/kubernetes/cadvisor" + targets = discovery.relabel.cadvisor.output + scheme = "https" + scrape_interval = "60s" + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + tls_config { + insecure_skip_verify = true + } + clustering { + enabled = true + } + forward_to = [prometheus.relabel.cadvisor.receiver] +} + +prometheus.relabel "cadvisor" { + max_cache_size = 100000 + rule { + source_labels = ["__name__"] + regex = "up|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|container_cpu_usage_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total|machine_memory_bytes" + action = "keep" + } + // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","container"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*)@" + action = "drop" + } + // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","image"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@" + action = "drop" + } + // Normalizing unimportant labels (not deleting to continue satisfying