From f59bc188137e4d7d75da85202609c1608eabe9ad Mon Sep 17 00:00:00 2001 From: Pete Wall Date: Thu, 14 Nov 2024 12:12:09 -0700 Subject: [PATCH 1/2] Create a platform test that sends data to Grafana Cloud and tests if all panel data is being set Signed-off-by: Pete Wall --- .../k8s-monitoring/.envrc | 4 + .../k8s-monitoring/Makefile | 2 + .../grafana-cloud-credentials.yaml | 12 +++ .../k8s-monitoring/test-manifest.yaml | 8 ++ .../k8s-monitoring/test-values.yaml | 10 +++ .../k8s-monitoring/test-variables.yaml | 8 ++ .../k8s-monitoring/values.yaml | 74 +++++++++++++++++++ 7 files changed, 118 insertions(+) create mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.envrc create mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/Makefile create mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/grafana-cloud-credentials.yaml create mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-manifest.yaml create mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-values.yaml create mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-variables.yaml create mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/values.yaml diff --git a/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.envrc b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.envrc new file mode 100644 index 000000000..653338a52 --- /dev/null +++ b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.envrc @@ -0,0 +1,4 @@ +export GRAFANA_CLOUD_METRICS_USERNAME=$(op --account grafana.1password.com read "op://Kubernetes Monitoring/helmchart Prometheus/username") +export GRAFANA_CLOUD_LOGS_USERNAME=$(op --account grafana.1password.com read "op://Kubernetes Monitoring/helmchart Loki/username") +export GRAFANA_CLOUD_RW_POLICY_TOKEN=$(op --account grafana.1password.com read "op://Kubernetes Monitoring/helmchart Loki/password") +export RANDOM_NUMBER=$(shuf -i 100000-999999 -n 1) \ No newline at end of file diff --git a/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/Makefile b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/Makefile new file mode 100644 index 000000000..3edf5ba77 --- /dev/null +++ b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/Makefile @@ -0,0 +1,2 @@ +run: + ../../../../../../scripts/run-integration-test.sh . \ No newline at end of file diff --git a/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/grafana-cloud-credentials.yaml b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/grafana-cloud-credentials.yaml new file mode 100644 index 000000000..ec9980003 --- /dev/null +++ b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/grafana-cloud-credentials.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: Secret +metadata: + name: grafana-cloud-credentials +stringData: + PROMETHEUS_URL: "https://prometheus-prod-13-prod-us-east-0.grafana.net/api/prom/api/v1/query" + PROMETHEUS_USER: "$GRAFANA_CLOUD_METRICS_USERNAME" + PROMETHEUS_PASS: "$GRAFANA_CLOUD_RW_POLICY_TOKEN" + LOKI_URL: "https://logs-prod-006.grafana.net/loki/api/v1/query" + LOKI_USER: "$GRAFANA_CLOUD_LOGS_USERNAME" + LOKI_PASS: "$GRAFANA_CLOUD_RW_POLICY_TOKEN" diff --git a/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-manifest.yaml b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-manifest.yaml new file mode 100644 index 000000000..e0ea923c3 --- /dev/null +++ b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-manifest.yaml @@ -0,0 +1,8 @@ +--- +prerequisites: + - type: manifest + name: grafana-cloud-credentials + file: charts/k8s-monitoring/tests/platform/remote-config/grafana-cloud-credentials.yaml + - type: manifest + name: test-variables + file: charts/k8s-monitoring/tests/platform/remote-config/test-variables.yaml diff --git a/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-values.yaml b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-values.yaml new file mode 100644 index 000000000..a14c8d9f4 --- /dev/null +++ b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-values.yaml @@ -0,0 +1,10 @@ +--- +tests: + - envFrom: + - secretRef: + name: grafana-cloud-credentials + - configMapRef: + name: test-variables + queries: + - query: alloy_build_info{cluster="k8s-monitoring-gc-feature-test", random="$RANDOM_NUMBER"} + type: promql diff --git a/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-variables.yaml b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-variables.yaml new file mode 100644 index 000000000..6ba18e56e --- /dev/null +++ b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-variables.yaml @@ -0,0 +1,8 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: test-variables +data: + CLUSTER: "k8s-monitoring-gc-feature-test" + RANDOM_NUMBER: "$RANDOM_NUMBER" diff --git a/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/values.yaml b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/values.yaml new file mode 100644 index 000000000..160c7aa88 --- /dev/null +++ b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/values.yaml @@ -0,0 +1,74 @@ +--- +cluster: + name: k8s-monitoring-gc-feature-test + +destinations: + - name: grafanaCloudMetrics + type: prometheus + url: https://prometheus-prod-13-prod-us-east-0.grafana.net/api/prom/push + auth: + type: basic + usernameKey: PROMETHEUS_USER + passwordKey: PROMETHEUS_PASS + secret: + create: false + name: grafana-cloud-credentials + extraLabelsFrom: + random: env("RANDOM_NUMBER") + - name: grafanaCloudLogs + type: loki + url: https://logs-prod-006.grafana.net/loki/api/v1/push + auth: + type: basic + usernameKey: LOKI_USER + passwordKey: LOKI_PASS + secret: + create: false + name: grafana-cloud-credentials + extraLabelsFrom: + random: env("RANDOM_NUMBER") + +clusterMetrics: + enabled: true + kepler: + enabled: true + +clusterEvents: + enabled: true + +podLogs: + enabled: true + +integrations: + alloy: + instances: + - name: alloy-metrics + labelSelectors: + app.kubernetes.io/name: alloy-metrics + - name: alloy-singleton + labelSelectors: + app.kubernetes.io/name: alloy-singleton + - name: alloy-logs + labelSelectors: + app.kubernetes.io/name: alloy-logs + +alloy-metrics: + enabled: true + alloy: + envFrom: + - configMapRef: + name: test-variables + +alloy-singleton: + enabled: true + alloy: + envFrom: + - configMapRef: + name: test-variables + +alloy-logs: + enabled: true + alloy: + envFrom: + - configMapRef: + name: test-variables From af1f27ee76d0d2f00534000d49c74696154241e5 Mon Sep 17 00:00:00 2001 From: Pete Wall Date: Mon, 6 Jan 2025 13:15:49 -0600 Subject: [PATCH 2/2] Update test to work in the new system Signed-off-by: Pete Wall --- .../k8s-monitoring/.gitignore | 2 + .../k8s-monitoring/.rendered/output.yaml | 4519 +++++++++++++++++ .../k8s-monitoring/Makefile | 21 +- .../deployments/query-test.yaml | 84 + .../grafana-cloud-credentials.yaml | 12 - .../k8s-monitoring/test-manifest.yaml | 8 - .../k8s-monitoring/test-values.yaml | 10 - .../k8s-monitoring/test-variables.yaml | 8 - .../k8s-monitoring/values.yaml | 40 +- scripts/run-cluster-test.sh | 5 +- 10 files changed, 4651 insertions(+), 58 deletions(-) create mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.gitignore create mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.rendered/output.yaml create mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/deployments/query-test.yaml delete mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/grafana-cloud-credentials.yaml delete mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-manifest.yaml delete mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-values.yaml delete mode 100644 charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/test-variables.yaml diff --git a/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.gitignore b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.gitignore new file mode 100644 index 000000000..a9f8e80bd --- /dev/null +++ b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.gitignore @@ -0,0 +1,2 @@ +deployments/grafana-cloud-credentials.yaml +deployments/test-variables.yaml diff --git a/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.rendered/output.yaml b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.rendered/output.yaml new file mode 100644 index 000000000..17c705e4f --- /dev/null +++ b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/.rendered/output.yaml @@ -0,0 +1,4519 @@ +--- +# Source: k8s-monitoring/charts/alloy-logs/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: k8smon-alloy-logs + namespace: default + labels: + helm.sh/chart: alloy-logs-0.10.1 + app.kubernetes.io/name: alloy-logs + app.kubernetes.io/instance: k8smon + + app.kubernetes.io/version: "v1.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: alloy + app.kubernetes.io/component: rbac +--- +# Source: k8s-monitoring/charts/alloy-metrics/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: k8smon-alloy-metrics + namespace: default + labels: + helm.sh/chart: alloy-metrics-0.10.1 + app.kubernetes.io/name: alloy-metrics + app.kubernetes.io/instance: k8smon + + app.kubernetes.io/version: "v1.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: alloy + app.kubernetes.io/component: rbac +--- +# Source: k8s-monitoring/charts/alloy-singleton/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: k8smon-alloy-singleton + namespace: default + labels: + helm.sh/chart: alloy-singleton-0.10.1 + app.kubernetes.io/name: alloy-singleton + app.kubernetes.io/instance: k8smon + + app.kubernetes.io/version: "v1.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: alloy + app.kubernetes.io/component: rbac +--- +# Source: k8s-monitoring/charts/clusterMetrics/charts/kepler/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: k8smon-kepler + namespace: default + labels: + helm.sh/chart: kepler-0.5.12 + app.kubernetes.io/name: kepler + app.kubernetes.io/component: exporter + app.kubernetes.io/version: "release-0.7.12" + app.kubernetes.io/managed-by: Helm +--- +# Source: k8s-monitoring/charts/clusterMetrics/charts/kube-state-metrics/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +automountServiceAccountToken: true +metadata: + labels: + helm.sh/chart: kube-state-metrics-5.28.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: k8smon + app.kubernetes.io/version: "2.14.0" + release: k8smon + name: k8smon-kube-state-metrics + namespace: default +--- +# Source: k8s-monitoring/charts/clusterMetrics/charts/node-exporter/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: k8smon-node-exporter + namespace: default + labels: + helm.sh/chart: node-exporter-4.43.1 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: node-exporter + app.kubernetes.io/name: node-exporter + app.kubernetes.io/instance: k8smon + app.kubernetes.io/version: "1.8.2" + release: k8smon +automountServiceAccountToken: false +--- +# Source: k8s-monitoring/charts/clusterMetrics/charts/opencost/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: k8smon-opencost + namespace: default + labels: + helm.sh/chart: opencost-1.43.0 + app.kubernetes.io/name: opencost + app.kubernetes.io/instance: k8smon + app.kubernetes.io/version: "1.113.0" + app.kubernetes.io/part-of: opencost + app.kubernetes.io/managed-by: Helm +automountServiceAccountToken: true +--- +# Source: k8s-monitoring/charts/clusterMetrics/charts/windows-exporter/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: k8smon-windows-exporter + namespace: default + labels: + helm.sh/chart: windows-exporter-0.7.1 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: windows-exporter + app.kubernetes.io/name: windows-exporter + app.kubernetes.io/instance: k8smon + app.kubernetes.io/version: "0.29.2" + release: k8smon +--- +# Source: k8s-monitoring/charts/clusterMetrics/charts/windows-exporter/templates/config.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: k8smon-windows-exporter + namespace: default + labels: + helm.sh/chart: windows-exporter-0.7.1 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: metrics + app.kubernetes.io/part-of: windows-exporter + app.kubernetes.io/name: windows-exporter + app.kubernetes.io/instance: k8smon + app.kubernetes.io/version: "0.29.2" + release: k8smon +data: + config.yml: | + collectors: + enabled: cpu,cs,container,logical_disk,memory,net,os + collector: + service: + services-where: "Name='containerd' or Name='kubelet'" +--- +# Source: k8s-monitoring/templates/alloy-config.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: k8smon-alloy-metrics + namespace: default +data: + config.alloy: |- + // Destination: grafanaCloudMetrics (prometheus) + otelcol.exporter.prometheus "grafanacloudmetrics" { + add_metric_suffixes = true + forward_to = [prometheus.remote_write.grafanacloudmetrics.receiver] + } + + prometheus.remote_write "grafanacloudmetrics" { + endpoint { + url = "https://prometheus-prod-13-prod-us-east-0.grafana.net/api/prom/push" + headers = { + "X-Scope-OrgID" = nonsensitive(remote.kubernetes.secret.grafanacloudmetrics.data["tenantId"]), + } + basic_auth { + username = nonsensitive(remote.kubernetes.secret.grafanacloudmetrics.data["PROMETHEUS_USER"]) + password = remote.kubernetes.secret.grafanacloudmetrics.data["PROMETHEUS_PASS"] + } + tls_config { + insecure_skip_verify = false + ca_pem = nonsensitive(remote.kubernetes.secret.grafanacloudmetrics.data["ca"]) + cert_pem = nonsensitive(remote.kubernetes.secret.grafanacloudmetrics.data["cert"]) + key_pem = remote.kubernetes.secret.grafanacloudmetrics.data["key"] + } + send_native_histograms = false + + queue_config { + capacity = 10000 + min_shards = 1 + max_shards = 50 + max_samples_per_send = 2000 + batch_send_deadline = "5s" + min_backoff = "30ms" + max_backoff = "5s" + retry_on_http_429 = true + sample_age_limit = "0s" + } + + write_relabel_config { + source_labels = ["cluster"] + regex = "" + replacement = "k8s-monitoring-gc-feature-test" + target_label = "cluster" + } + write_relabel_config { + source_labels = ["k8s.cluster.name"] + regex = "" + replacement = "k8s-monitoring-gc-feature-test" + target_label = "cluster" + } + } + + wal { + truncate_frequency = "2h" + min_keepalive_time = "5m" + max_keepalive_time = "8h" + } + } + + remote.kubernetes.secret "grafanacloudmetrics" { + name = "grafana-cloud-credentials" + namespace = "default" + } + + // Feature: Cluster Metrics + declare "cluster_metrics" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + remote.kubernetes.configmap "kubernetes" { + name = "k8smon-alloy-module-kubernetes" + namespace = "default" + } + + import.string "kubernetes" { + content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] + } + + kubernetes.kubelet "scrape" { + clustering = true + keep_metrics = "up|go_goroutines|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_server_ttl_seconds|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_count|kubelet_node_config_error|kubelet_node_name|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_count|kubelet_running_container_count|kubelet_running_containers|kubelet_running_pod_count|kubelet_running_pods|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_total|kubelet_server_expiration_renew_errors|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes|kubelet_volume_stats_inodes_free|kubelet_volume_stats_inodes_used|kubelet_volume_stats_used_bytes|kubernetes_build_info|namespace_workload_pod|process_cpu_seconds_total|process_resident_memory_bytes|rest_client_requests_total|storage_operation_duration_seconds_count|storage_operation_errors_total|volume_manager_total_volumes|scrape_samples_scraped" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } + + kubernetes.resources "scrape" { + clustering = true + job_label = "integrations/kubernetes/resources" + keep_metrics = "up|node_cpu_usage_seconds_total|node_memory_working_set_bytes|scrape_samples_scraped" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } + + kubernetes.cadvisor "scrape" { + clustering = true + keep_metrics = "up|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|container_cpu_usage_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total|machine_memory_bytes|scrape_samples_scraped" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = [prometheus.relabel.cadvisor.receiver] + } + + prometheus.relabel "cadvisor" { + max_cache_size = 100000 + // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","container"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*)@" + action = "drop" + } + // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","image"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@" + action = "drop" + } + // Normalizing unimportant labels (not deleting to continue satisfying