From f504c8056d4d438d1ba0fa1a2e4d96cfdc93ec0d Mon Sep 17 00:00:00 2001 From: Vishnu Challa Date: Fri, 15 Sep 2023 20:12:37 -0400 Subject: [PATCH 1/4] Added: Support for multiple multiple folders, python client Modified: Internal logic for creating/updating dashboards Deleted: Removed old logic as it is not flexible for automation --- .github/workflows/ci.yml | 30 +- .github/workflows/grafana.yml | 15 +- Dockerfile | 21 + Makefile | 9 +- dittybopper/README.md | 4 +- dittybopper/deploy.sh | 4 +- dittybopper/k8s-deploy.sh | 1 + dittybopper/syncer/Dockerfile | 7 - dittybopper/syncer/entrypoint.py | 129 + dittybopper/syncer/entrypoint.sh | 22 - .../templates/dittybopper.yaml.template | 8 +- .../templates/k8s-dittybopper.yaml.template | 8 +- requirements.txt | 2 + templates/{ => CPT}/k8s-perf.jsonnet | 2 +- templates/{ => CPT}/kube-burner.jsonnet | 2 +- .../api-performance-overview.jsonnet | 2 +- .../{ => General}/cilium-k8s-perf.jsonnet | 2 +- .../etcd-on-cluster-dashboard.jsonnet | 2 +- .../hypershift-performance.jsonnet | 2 +- templates/General/k8s-perf.jsonnet | 499 ++ templates/General/kube-burner.jsonnet | 4568 +++++++++++++++++ .../{ => General}/ocp-performance.jsonnet | 2 +- templates/{ => General}/ovn-dashboard.jsonnet | 2 +- .../{ => General}/pgbench-dashboard.jsonnet | 2 +- templates/{ => General}/uperf-perf.jsonnet | 2 +- .../{ => General}/vegeta-wrapper.jsonnet | 2 +- templates/{ => General}/ycsb.jsonnet | 2 +- 27 files changed, 5268 insertions(+), 83 deletions(-) create mode 100644 Dockerfile delete mode 100644 dittybopper/syncer/Dockerfile create mode 100644 dittybopper/syncer/entrypoint.py delete mode 100755 dittybopper/syncer/entrypoint.sh create mode 100644 requirements.txt rename templates/{ => CPT}/k8s-perf.jsonnet (99%) rename templates/{ => CPT}/kube-burner.jsonnet (99%) rename templates/{ => General}/api-performance-overview.jsonnet (99%) rename templates/{ => General}/cilium-k8s-perf.jsonnet (99%) rename templates/{ => General}/etcd-on-cluster-dashboard.jsonnet (99%) rename templates/{ => General}/hypershift-performance.jsonnet (99%) create mode 100644 templates/General/k8s-perf.jsonnet create mode 100644 templates/General/kube-burner.jsonnet rename templates/{ => General}/ocp-performance.jsonnet (99%) rename templates/{ => General}/ovn-dashboard.jsonnet (99%) rename templates/{ => General}/pgbench-dashboard.jsonnet (98%) rename templates/{ => General}/uperf-perf.jsonnet (99%) rename templates/{ => General}/vegeta-wrapper.jsonnet (98%) rename templates/{ => General}/ycsb.jsonnet (99%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 48d7fa1..db020a7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,6 +17,19 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: + lint: + runs-on: ubuntu-latest + + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v2 + + - name: Get dependencies + run: make deps + + - name: Run jsonnetfmt + run: make format + build: runs-on: ubuntu-latest @@ -35,23 +48,10 @@ jobs: - name: Import dashboards to grafana run: > - for t in rendered/*.json; do + for t in rendered/**/*.json; do echo "Importing ${t}"; dashboard=$(cat ${t}); echo "{\"dashboard\": ${dashboard}, \"overwrite\": true}" | curl -k -Ss -XPOST -H "Content-Type: application/json" -H "Accept: application/json" -d@- "http://admin:admin@localhost:3000/api/dashboards/db" -o /dev/null; - done - - lint: - runs-on: ubuntu-latest - - steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 - - - name: Get dependencies - run: make deps - - - name: Run jsonnetfmt - run: for t in templates/*.jsonnet; do echo "Testing template ${t}"; ./bin/jsonnetfmt --test $t; echo 'Results:' ${?}; done + done \ No newline at end of file diff --git a/.github/workflows/grafana.yml b/.github/workflows/grafana.yml index f87ed2b..0618d03 100644 --- a/.github/workflows/grafana.yml +++ b/.github/workflows/grafana.yml @@ -3,10 +3,6 @@ defaults: run: shell: bash -env: - # Space separated list as a string of all dashboard json files in "rendered" to load - DASHBOARDS: "kube-burner.json" - on: push: branches: [ master ] @@ -25,13 +21,10 @@ jobs: # The secret GRAFANA_URL must be set with the format http://username:password@url.org without a trailing / - name: Import dashboards to grafana run: > - dashboard_list=($(echo $DASHBOARDS)); - for path in "${dashboard_list[@]}"; do - full_path="rendered/${path}"; - echo "Importing ${full_path}"; - dashboard=$(cat ${full_path}); + for t in rendered/**/*.json; do + echo "Importing ${t}"; + dashboard=$(cat ${t}); echo "{\"dashboard\": ${dashboard}, \"overwrite\": true}" | curl -k -Ss -XPOST -H "Content-Type: application/json" -H "Accept: application/json" -d@- "${{ secrets.GRAFANA_URL }}/api/dashboards/db" -o /dev/null; - done - + done \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..98f1f46 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM registry.access.redhat.com/ubi8/ubi-minimal + +# Set the working directory +WORKDIR /performance-dashboards + +# Install necessary libraries for subsequent commands +RUN microdnf install -y podman python3 python3-pip && \ + microdnf clean all && \ + rm -rf /var/cache/yum + +COPY . . + +# Set permissions +RUN chmod -R 775 /performance-dashboards + +# Install dependencies +RUN pip3 install --upgrade pip && \ + pip3 install -r requirements.txt + +# Start the command +CMD ["python3", "dittybopper/syncer/entrypoint.py"] \ No newline at end of file diff --git a/Makefile b/Makefile index 479a16b..2b6a8ff 100644 --- a/Makefile +++ b/Makefile @@ -7,10 +7,10 @@ SYNCER_IMG_TAG ?= quay.io/cloud-bulldozer/dittybopper-syncer:latest PLATFORM = linux/amd64,linux/arm64,linux/ppc64le,linux/s390x # Get all templates at $(TEMPLATESDIR) -TEMPLATES = $(wildcard $(TEMPLATESDIR)/*.jsonnet) +TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*.jsonnet) # Replace $(TEMPLATESDIR)/*.jsonnet by $(OUTPUTDIR)/*.json -outputs = $(patsubst $(TEMPLATESDIR)/%.jsonnet, $(OUTPUTDIR)/%.json, $(TEMPLATES)) +outputs := $(patsubst $(TEMPLATESDIR)/%.jsonnet, $(OUTPUTDIR)/%.json, $(TEMPLATES)) all: deps format build @@ -38,10 +38,11 @@ $(BINDIR)/jsonnet: # Build each template and output to $(OUTPUTDIR) $(OUTPUTDIR)/%.json: $(TEMPLATESDIR)/%.jsonnet @echo "Building template $<" + mkdir -p $(dir $@) $(BINDIR)/jsonnet $< > $@ build-syncer-image: build - podman build --platform=${PLATFORM} -f dittybopper/syncer/Dockerfile --manifest=${SYNCER_IMG_TAG} . + podman build --platform=${PLATFORM} -f Dockerfile --manifest=${SYNCER_IMG_TAG} . push-syncer-image: - podman manifest push ${SYNCER_IMG_TAG} ${SYNCER_IMG_TAG} + podman manifest push ${SYNCER_IMG_TAG} ${SYNCER_IMG_TAG} \ No newline at end of file diff --git a/dittybopper/README.md b/dittybopper/README.md index ee9fed2..3975a3a 100644 --- a/dittybopper/README.md +++ b/dittybopper/README.md @@ -27,9 +27,9 @@ If using disconnected, you need to sync the cloud-bulldozer grafana image (shown dittybopper/templates/dittybopper.yaml.template file) and your chosen syncer image (defaults to quay.io/cloud-bulldozer/dittybopper-syncer:latest). -The syncer image is built with the context at the root of the repository, and the image in the dittybopper/syncer directory. +The syncer image is built with the context at the root of the repository, and the image in the root directory. You can build it with `make build-syncer-image SYNCER_IMG_TAG=container.registry.org/organization/syncer:latest` -Alternatively, you can run the following command form the root folder of this repository: `podman build -f dittybopper/syncer/Dockerfile -t=container.registry.org/organization/syncer:latest .` +Alternatively, you can run the following command from the root folder of this repository: `podman build -f Dockerfile -t=container.registry.org/organization/syncer:latest .` ## Contribute diff --git a/dittybopper/deploy.sh b/dittybopper/deploy.sh index 09d0cde..c90adb4 100755 --- a/dittybopper/deploy.sh +++ b/dittybopper/deploy.sh @@ -40,8 +40,8 @@ END export PROMETHEUS_USER=internal export GRAFANA_ADMIN_PASSWORD=admin -export DASHBOARDS="ocp-performance.json api-performance-overview.json etcd-on-cluster-dashboard.json hypershift-performance.json ovn-dashboard.json" -export SYNCER_IMAGE=${SYNCER_IMAGE:-"quay.io/cloud-bulldozer/dittybopper-syncer:latest"} # Syncer image +export GRAFANA_URL="http://admin:${GRAFANA_ADMIN_PASSWORD}@localhost:3000" +export SYNCER_IMAGE=${SYNCER_IMAGE:-"quay.io/cloud-bulldozer/syncer:latest"} # Syncer image export GRAFANA_IMAGE=${GRAFANA_IMAGE:-"quay.io/cloud-bulldozer/grafana:9.4.3"} # Syncer image # Set defaults for command options diff --git a/dittybopper/k8s-deploy.sh b/dittybopper/k8s-deploy.sh index 47eb7aa..7ba6817 100755 --- a/dittybopper/k8s-deploy.sh +++ b/dittybopper/k8s-deploy.sh @@ -38,6 +38,7 @@ END export PROMETHEUS_USER=internal export GRAFANA_ADMIN_PASSWORD=admin +export GRAFANA_URL="http://admin:${GRAFANA_ADMIN_PASSWORD}@localhost:3000" export DASHBOARDS="k8s-performance.json" export SYNCER_IMAGE=${SYNCER_IMAGE:-"quay.io/cloud-bulldozer/dittybopper-syncer:latest"} # Syncer image export GRAFANA_IMAGE=${GRAFANA_IMAGE:-"quay.io/cloud-bulldozer/grafana:9.4.3"} # Syncer image diff --git a/dittybopper/syncer/Dockerfile b/dittybopper/syncer/Dockerfile deleted file mode 100644 index 851b462..0000000 --- a/dittybopper/syncer/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM registry.access.redhat.com/ubi8/ubi-minimal - -WORKDIR /performance-dashboards -COPY dittybopper/syncer/entrypoint.sh /bin/entrypoint.sh -COPY rendered/*.json /performance-dashboards/ -RUN chmod -R 775 /performance-dashboards -ENTRYPOINT ["entrypoint.sh"] diff --git a/dittybopper/syncer/entrypoint.py b/dittybopper/syncer/entrypoint.py new file mode 100644 index 0000000..e9d607a --- /dev/null +++ b/dittybopper/syncer/entrypoint.py @@ -0,0 +1,129 @@ +import json +import logging +import os +import requests +import uuid +import time +from collections import defaultdict + +logging.basicConfig(level=logging.INFO) + + +class GrafanaOperations: + """ + This class is responsible for Grafana operations + """ + def __init__(self, grafana_url: str, input_directory: str): + self.grafana_url = grafana_url + self.input_directory = input_directory + self.dashboards = defaultdict(list) + self.folder_map = dict() + self.logger = logging.getLogger(__name__) + + def fetch_all_dashboards(self): + """ + This method fetches all rendered dashboards + :return: + """ + self.get_all_folders() + self.folder_map['General'] = None + for root, _, files in os.walk(self.input_directory): + folder_name = os.path.basename(root) + json_files = [os.path.join(root, filename) for filename in files if filename.endswith(".json")] + folder_name = "General" if (folder_name == "") else folder_name + if folder_name in self.folder_map: + folder_id = self.folder_map[folder_name] + else: + folder_id = self.create_folder(folder_name) + self.dashboards[folder_id].extend(json_files) + + def get_all_folders(self): + """ + This method gets the entire list of folders in grafana + :return: + """ + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + } + try: + response = requests.get( + f"{self.grafana_url}/api/folders", + headers=headers, + ) + response_json = response.json() + self.folder_map = {each_folder['title']: each_folder['id'] for each_folder in response_json} + except requests.exceptions.RequestException as e: + raise Exception(f"Error listing folders. Message: {e}") + + def create_folder(self, folder_name): + """ + This method creates a folder in grafana + :return: + """ + uid = str(uuid.uuid4()) + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + } + try: + response = requests.post( + f"{self.grafana_url}/api/folders", + headers=headers, + json={ + "title": folder_name, + "uid": uid, + }, + ) + response_json = response.json() + self.folder_map[folder_name] = id + return response_json['id'] + + except requests.exceptions.RequestException as e: + raise Exception(f"Error creating folder with name:'{self.folder_name}' and uid:'{uid}'. Message: {e}") + + def read_dashboard_json(self, json_file): + """ + This method reads dashboard from json file + :return: + """ + with open(json_file, 'r') as f: + return json.load(f) + + def create_dashboards(self): + """ + This method creates/updates dashboard with new json + :return: + """ + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + } + for folder_id, files in self.dashboards.items(): + for json_file in set(files): + dashboard_json = self.read_dashboard_json(json_file) + try: + response = requests.post( + f"{self.grafana_url}/api/dashboards/db", + headers=headers, + json={ + "dashboard": dashboard_json, + "folderId": folder_id, + "overwrite": True, + }, + ) + if response.status_code == 200: + self.logger.info(f"Dashboard '{dashboard_json['title']}' created successfully in folder '{folder_id}'") + else: + raise Exception( + f"Failed to create dashboard '{dashboard_json['title']}' in folder '{folder_id}'. Status code: {response.status_code}. Message: {response.text}") + + except requests.exceptions.RequestException as e: + raise Exception(f"Error creating dashboard '{dashboard_json['title']}' in folder '{folder_id}'. Message: {e}") + +if __name__ == '__main__': + grafana_operations = GrafanaOperations(os.environ.get("GRAFANA_URL"), os.environ.get("INPUT_DIR")) + grafana_operations.fetch_all_dashboards() + grafana_operations.create_dashboards() + while True: + time.sleep(60) diff --git a/dittybopper/syncer/entrypoint.sh b/dittybopper/syncer/entrypoint.sh deleted file mode 100755 index d674d90..0000000 --- a/dittybopper/syncer/entrypoint.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -while [[ $(curl -s -o /dev/null -w '%{http_code}' http://localhost:3000/api/health) != "200" ]]; do - echo "Grafana still not ready, waiting 5 seconds" - sleep 5 -done - -for d in ${DASHBOARDS}; do - if [[ ! -f $d ]]; then - echo "Dashboard ${d} not found" - continue - else - echo "Importing dashboard $d" - dashboard=$(cat ${d}) - echo "{\"dashboard\": ${dashboard}, \"overwrite\": true}" | \ - curl -Ss -XPOST -H "Content-Type: application/json" -H "Accept: application/json" -d@- \ - "http://admin:${GRAFANA_ADMIN_PASSWORD}@localhost:3000/api/dashboards/db" -o /dev/null - fi -done - -echo "Dittybopper ready" -exec sleep inf diff --git a/dittybopper/templates/dittybopper.yaml.template b/dittybopper/templates/dittybopper.yaml.template index 725cbc0..b9f8945 100644 --- a/dittybopper/templates/dittybopper.yaml.template +++ b/dittybopper/templates/dittybopper.yaml.template @@ -60,10 +60,10 @@ spec: - name: dittybopper-syncer imagePullPolicy: Always env: - - name: GRAFANA_ADMIN_PASSWORD - value: ${GRAFANA_ADMIN_PASSWORD} - - name: DASHBOARDS - value: ${DASHBOARDS} + - name: GRAFANA_URL + value: ${GRAFANA_URL} + - name: INPUT_DIR + value: "/performance-dashboards/rendered/" image: ${SYNCER_IMAGE} volumes: - name: sc-grafana-config diff --git a/dittybopper/templates/k8s-dittybopper.yaml.template b/dittybopper/templates/k8s-dittybopper.yaml.template index 282cf69..cffb85a 100644 --- a/dittybopper/templates/k8s-dittybopper.yaml.template +++ b/dittybopper/templates/k8s-dittybopper.yaml.template @@ -48,10 +48,10 @@ spec: - name: dittybopper-syncer imagePullPolicy: Always env: - - name: GRAFANA_ADMIN_PASSWORD - value: ${GRAFANA_ADMIN_PASSWORD} - - name: DASHBOARDS - value: ${DASHBOARDS} + - name: GRAFANA_URL + value: ${GRAFANA_URL} + - name: INPUT_DIR + value: "/performance-dashboards/rendered/" image: ${SYNCER_IMAGE} volumes: - name: sc-grafana-config diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..138d722 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests==2.26.0 + diff --git a/templates/k8s-perf.jsonnet b/templates/CPT/k8s-perf.jsonnet similarity index 99% rename from templates/k8s-perf.jsonnet rename to templates/CPT/k8s-perf.jsonnet index d00dcb1..7308819 100644 --- a/templates/k8s-perf.jsonnet +++ b/templates/CPT/k8s-perf.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; diff --git a/templates/kube-burner.jsonnet b/templates/CPT/kube-burner.jsonnet similarity index 99% rename from templates/kube-burner.jsonnet rename to templates/CPT/kube-burner.jsonnet index 5e32c18..cdb5160 100644 --- a/templates/kube-burner.jsonnet +++ b/templates/CPT/kube-burner.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local es = grafana.elasticsearch; local worker_count = grafana.statPanel.new( diff --git a/templates/api-performance-overview.jsonnet b/templates/General/api-performance-overview.jsonnet similarity index 99% rename from templates/api-performance-overview.jsonnet rename to templates/General/api-performance-overview.jsonnet index 77f4db2..246e9ff 100644 --- a/templates/api-performance-overview.jsonnet +++ b/templates/General/api-performance-overview.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; //Panel definitions diff --git a/templates/cilium-k8s-perf.jsonnet b/templates/General/cilium-k8s-perf.jsonnet similarity index 99% rename from templates/cilium-k8s-perf.jsonnet rename to templates/General/cilium-k8s-perf.jsonnet index 3bcef8b..90c21f0 100644 --- a/templates/cilium-k8s-perf.jsonnet +++ b/templates/General/cilium-k8s-perf.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; diff --git a/templates/etcd-on-cluster-dashboard.jsonnet b/templates/General/etcd-on-cluster-dashboard.jsonnet similarity index 99% rename from templates/etcd-on-cluster-dashboard.jsonnet rename to templates/General/etcd-on-cluster-dashboard.jsonnet index ca52c2e..68bbc9d 100644 --- a/templates/etcd-on-cluster-dashboard.jsonnet +++ b/templates/General/etcd-on-cluster-dashboard.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; // Panel definitions diff --git a/templates/hypershift-performance.jsonnet b/templates/General/hypershift-performance.jsonnet similarity index 99% rename from templates/hypershift-performance.jsonnet rename to templates/General/hypershift-performance.jsonnet index bd321e4..8416234 100644 --- a/templates/hypershift-performance.jsonnet +++ b/templates/General/hypershift-performance.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; local stat = grafana.statPanel; diff --git a/templates/General/k8s-perf.jsonnet b/templates/General/k8s-perf.jsonnet new file mode 100644 index 0000000..7308819 --- /dev/null +++ b/templates/General/k8s-perf.jsonnet @@ -0,0 +1,499 @@ +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; +local prometheus = grafana.prometheus; + + +// Helper functions + +local genericGraphPanel(title, format) = grafana.graphPanel.new( + title=title, + datasource='$datasource', + format=format, + nullPointMode='null as zero', + sort='decreasing', + legend_alignAsTable=true, +); + +local genericGraphLegendPanel(title, format) = grafana.graphPanel.new( + title=title, + datasource='$datasource', + format=format, + legend_values=true, + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_hideEmpty=true, + legend_hideZero=true, + legend_sort='max', + nullPointMode='null as zero', + sort='decreasing', +); + + +local nodeMemory(nodeName) = genericGraphLegendPanel('System Memory: ' + nodeName, 'bytes').addTarget( + prometheus.target( + 'node_memory_Active_bytes{node=~"' + nodeName + '"}', + legendFormat='Active', + ) +).addTarget( + prometheus.target( + 'node_memory_MemTotal_bytes{node=~"' + nodeName + '"}', + legendFormat='Total', + ) +).addTarget( + prometheus.target( + 'node_memory_Cached_bytes{node=~"' + nodeName + '"} + node_memory_Buffers_bytes{node=~"' + nodeName + '"}', + legendFormat='Cached + Buffers', + ) +).addTarget( + prometheus.target( + 'node_memory_MemAvailable_bytes{node=~"' + nodeName + '"}', + legendFormat='Available', + ) +); + + +local nodeCPU(nodeName) = genericGraphLegendPanel('CPU Basic: ' + nodeName, 'percent').addTarget( + prometheus.target( + 'sum by (instance, mode)(rate(node_cpu_seconds_total{node=~"' + nodeName + '",job=~".*"}[$interval])) * 100', + legendFormat='Busy {{mode}}', + ) +); + + +local diskThroughput(nodeName) = genericGraphLegendPanel('Disk throughput: ' + nodeName, 'Bps').addTarget( + prometheus.target( + 'rate(node_disk_read_bytes_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', + legendFormat='{{ device }} - read', + ) +).addTarget( + prometheus.target( + 'rate(node_disk_written_bytes_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', + legendFormat='{{ device }} - write', + ) +); + +local diskIOPS(nodeName) = genericGraphLegendPanel('Disk IOPS: ' + nodeName, 'iops').addTarget( + prometheus.target( + 'rate(node_disk_reads_completed_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', + legendFormat='{{ device }} - read', + ) +).addTarget( + prometheus.target( + 'rate(node_disk_writes_completed_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', + legendFormat='{{ device }} - write', + ) +); + +local networkUtilization(nodeName) = genericGraphLegendPanel('Network Utilization: ' + nodeName, 'bps').addTarget( + prometheus.target( + 'rate(node_network_receive_bytes_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', + legendFormat='{{instance}} - {{device}} - RX', + ) +).addTarget( + prometheus.target( + 'rate(node_network_transmit_bytes_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', + legendFormat='{{instance}} - {{device}} - TX', + ) +); + +local networkPackets(nodeName) = genericGraphLegendPanel('Network Packets: ' + nodeName, 'pps').addTarget( + prometheus.target( + 'rate(node_network_receive_packets_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval])', + legendFormat='{{instance}} - {{device}} - RX', + ) +).addTarget( + prometheus.target( + 'rate(node_network_transmit_packets_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval])', + legendFormat='{{instance}} - {{device}} - TX', + ) +); + +local networkDrop(nodeName) = genericGraphLegendPanel('Network packets drop: ' + nodeName, 'pps').addTarget( + prometheus.target( + 'topk(10, rate(node_network_receive_drop_total{node=~"' + nodeName + '"}[$interval]))', + legendFormat='rx-drop-{{ device }}', + ) +).addTarget( + prometheus.target( + 'topk(10,rate(node_network_transmit_drop_total{node=~"' + nodeName + '"}[$interval]))', + legendFormat='tx-drop-{{ device }}', + ) +); + +local conntrackStats(nodeName) = genericGraphLegendPanel('Conntrack stats: ' + nodeName, '') + { + seriesOverrides: [{ + alias: 'conntrack_limit', + yaxis: 2, + }], + yaxes: [{ show: true }, { show: true }], +} + .addTarget( + prometheus.target( + 'node_nf_conntrack_entries{node=~"' + nodeName + '"}', + legendFormat='conntrack_entries', + ) +).addTarget( + prometheus.target( + 'node_nf_conntrack_entries_limit{node=~"' + nodeName + '"}', + legendFormat='conntrack_limit', + ) +); + +local top10ContainerCPU(nodeName) = genericGraphLegendPanel('Top 10 container CPU: ' + nodeName, 'percent').addTarget( + prometheus.target( + 'topk(10, sum(irate(container_cpu_usage_seconds_total{container!="POD",name!="",instance=~"' + nodeName + '",namespace!="",namespace=~"$namespace"}[$interval])) by (pod,container,namespace,name,service) * 100)', + legendFormat='{{ pod }}: {{ container }}', + ) +); + +local top10ContainerRSS(nodeName) = genericGraphLegendPanel('Top 10 container RSS: ' + nodeName, 'bytes').addTarget( + prometheus.target( + 'topk(10, container_memory_rss{container!="POD",name!="",instance=~"' + nodeName + '",namespace!="",namespace=~"$namespace"})', + legendFormat='{{ pod }}: {{ container }}', + ) +); + +local containerWriteBytes(nodeName) = genericGraphLegendPanel('Container fs write rate: ' + nodeName, 'Bps').addTarget( + prometheus.target( + 'sum(rate(container_fs_writes_bytes_total{device!~".+dm.+", node=~"' + nodeName + '", container!=""}[$interval])) by (device, container)', + legendFormat='{{ container }}: {{ device }}', + ) +); + +// Individual panel definitions + +// Monitoring Stack + +local promReplMemUsage = genericGraphLegendPanel('Prometheus Replica Memory usage', 'bytes').addTarget( + prometheus.target( + 'sum(container_memory_rss{pod="prometheus-k8s-1",namespace!="",name!="",container="prometheus"}) by (pod)', + legendFormat='{{pod}}', + ) +).addTarget( + prometheus.target( + 'sum(container_memory_rss{pod="prometheus-k8s-0",namespace!="",name!="",container="prometheus"}) by (pod)', + legendFormat='{{pod}}', + ) +); + +// Kubelet + +local kubeletCPU = genericGraphLegendPanel('Top 10 Kubelet CPU usage', 'percent').addTarget( + prometheus.target( + 'topk(10,rate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[$interval])*100)', + legendFormat='kubelet - {{node}}', + ) +); + +local crioCPU = genericGraphLegendPanel('Top 10 crio CPU usage', 'percent').addTarget( + prometheus.target( + 'topk(10,rate(process_cpu_seconds_total{service="kubelet",job="crio"}[$interval])*100)', + legendFormat='crio - {{node}}', + ) +); + +local kubeletMemory = genericGraphLegendPanel('Top 10 Kubelet memory usage', 'bytes').addTarget( + prometheus.target( + 'topk(10,process_resident_memory_bytes{service="kubelet",job="kubelet"})', + legendFormat='kubelet - {{node}}', + ) +); + +local crioMemory = genericGraphLegendPanel('Top 10 crio memory usage', 'bytes').addTarget( + prometheus.target( + 'topk(10,process_resident_memory_bytes{service="kubelet",job="crio"})', + legendFormat='crio - {{node}}', + ) +); + +// Cluster details + +local current_node_count = grafana.statPanel.new( + title='Current Node Count', + datasource='$datasource', + reducerFunction='last', +).addTarget( + prometheus.target( + 'sum(kube_node_info{})', + legendFormat='Number of nodes', + ) +).addTarget( + prometheus.target( + 'sum(kube_node_status_condition{status="true"}) by (condition) > 0', + legendFormat='Node: {{ condition }}', + ) +); + +local current_namespace_count = grafana.statPanel.new( + title='Current namespace Count', + datasource='$datasource', + reducerFunction='last', +).addTarget( + prometheus.target( + 'sum(kube_namespace_status_phase) by (phase)', + legendFormat='{{ phase }}', + ) +); + +local current_pod_count = grafana.statPanel.new( + title='Current Pod Count', + reducerFunction='last', + datasource='$datasource', +).addTarget( + prometheus.target( + 'sum(kube_pod_status_phase{}) by (phase) > 0', + legendFormat='{{ phase}} Pods', + ) +); + +local nodeCount = genericGraphPanel('Number of nodes', 'none').addTarget( + prometheus.target( + 'sum(kube_node_info{})', + legendFormat='Number of nodes', + ) +).addTarget( + prometheus.target( + 'sum(kube_node_status_condition{status="true"}) by (condition) > 0', + legendFormat='Node: {{ condition }}', + ) +); + +local nsCount = genericGraphPanel('Namespace count', 'none').addTarget( + prometheus.target( + 'sum(kube_namespace_status_phase) by (phase) > 0', + legendFormat='{{ phase }} namespaces', + ) +); + +local podCount = genericGraphPanel('Pod count', 'none').addTarget( + prometheus.target( + 'sum(kube_pod_status_phase{}) by (phase)', + legendFormat='{{phase}} pods', + ) +); + +local secretCmCount = genericGraphPanel('Secret & configmap count', 'none').addTarget( + prometheus.target( + 'count(kube_secret_info{})', + legendFormat='secrets', + ) +).addTarget( + prometheus.target( + 'count(kube_configmap_info{})', + legendFormat='Configmaps', + ) +); + +local deployCount = genericGraphPanel('Deployment count', 'none').addTarget( + prometheus.target( + 'count(kube_deployment_labels{})', + legendFormat='Deployments', + ) +); + + +local servicesCount = genericGraphPanel('Services count', 'none').addTarget( + prometheus.target( + 'count(kube_service_info{})', + legendFormat='Services', + ) +); + +local alerts = genericGraphPanel('Alerts', 'none').addTarget( + prometheus.target( + 'topk(10,sum(ALERTS{severity!="none"}) by (alertname, severity))', + legendFormat='{{severity}}: {{alertname}}', + ) +); + +local top10ContMem = genericGraphLegendPanel('Top 10 container RSS', 'bytes').addTarget( + prometheus.target( + 'topk(10, container_memory_rss{namespace!="",container!="POD",name!=""})', + legendFormat='{{ namespace }} - {{ name }}', + ) +); + +local podDistribution = genericGraphLegendPanel('Pod Distribution', 'none').addTarget( + prometheus.target( + 'count(kube_pod_info{}) by (exported_node)', + legendFormat='{{ node }}', + ) +); + +local top10ContCPU = genericGraphLegendPanel('Top 10 container CPU', 'percent').addTarget( + prometheus.target( + 'topk(10,irate(container_cpu_usage_seconds_total{namespace!="",container!="POD",name!=""}[$interval])*100)', + legendFormat='{{ namespace }} - {{ name }}', + ) +); + + +local goroutines_count = genericGraphPanel('Goroutines count', 'none').addTarget( + prometheus.target( + 'topk(10, sum(go_goroutines{}) by (job,instance))', + legendFormat='{{ job }} - {{ instance }}', + ) +); + +// Cluster operators + +local clusterOperatorsOverview = grafana.statPanel.new( + datasource='$datasource', + title='Cluster operators overview', +).addTarget( + prometheus.target( + 'sum by (condition)(cluster_operator_conditions{condition!=""})', + legendFormat='{{ condition }}', + ) +); + +local clusterOperatorsInformation = genericGraphLegendPanel('Cluster operators information', 'none').addTarget( + prometheus.target( + 'cluster_operator_conditions{name!="",reason!=""}', + legendFormat='{{name}} - {{reason}}', + ) +); + +local clusterOperatorsDegraded = genericGraphLegendPanel('Cluster operators degraded', 'none').addTarget( + prometheus.target( + 'cluster_operator_conditions{condition="Degraded",name!="",reason!=""}', + legendFormat='{{name}} - {{reason}}', + ) +); + + +// Dashboard + +grafana.dashboard.new( + 'k8s Performance', + description='Performance dashboard for Red Hat k8s', + time_from='now-1h', + timezone='utc', + refresh='30s', + editable='true', +) + + +// Templates + +.addTemplate( + grafana.template.datasource( + 'datasource', + 'prometheus', + '', + ) +) + +.addTemplate( + grafana.template.new( + '_worker_node', + '$datasource', + 'label_values(kube_node_labels{}, exported_node)', + '', + refresh=2, + ) { + label: 'Worker', + type: 'query', + multi: true, + includeAll: false, + }, +) + +.addTemplate( + grafana.template.new( + 'namespace', + '$datasource', + 'label_values(kube_pod_info, exported_namespace)', + '', + refresh=2, + ) { + label: 'Namespace', + type: 'query', + multi: false, + includeAll: true, + }, +) + + +.addTemplate( + grafana.template.new( + 'block_device', + '$datasource', + 'label_values(node_disk_written_bytes_total,device)', + '', + regex='/^(?:(?!dm|rb).)*$/', + refresh=2, + ) { + label: 'Block device', + type: 'query', + multi: true, + includeAll: true, + }, +) + + +.addTemplate( + grafana.template.new( + 'net_device', + '$datasource', + 'label_values(node_network_receive_bytes_total,device)', + '', + regex='/^((br|en|et).*)$/', + refresh=2, + ) { + label: 'Network device', + type: 'query', + multi: true, + includeAll: true, + }, +) + +.addTemplate( + grafana.template.new( + 'interval', + '$datasource', + '$__auto_interval_period', + label='interval', + refresh='time', + ) { + type: 'interval', + query: '2m,3m,4m,5m', + auto: false, + }, +) + +// Dashboard definition + +.addPanel(grafana.row.new(title='Cluster Details', collapse=true).addPanels( + [ + current_node_count { gridPos: { x: 0, y: 4, w: 8, h: 3 } }, + current_namespace_count { gridPos: { x: 8, y: 4, w: 8, h: 3 } }, + current_pod_count { gridPos: { x: 16, y: 4, w: 8, h: 3 } }, + nodeCount { gridPos: { x: 0, y: 12, w: 8, h: 8 } }, + nsCount { gridPos: { x: 8, y: 12, w: 8, h: 8 } }, + podCount { gridPos: { x: 16, y: 12, w: 8, h: 8 } }, + secretCmCount { gridPos: { x: 0, y: 20, w: 8, h: 8 } }, + deployCount { gridPos: { x: 8, y: 20, w: 8, h: 8 } }, + servicesCount { gridPos: { x: 16, y: 20, w: 8, h: 8 } }, + top10ContMem { gridPos: { x: 0, y: 28, w: 24, h: 8 } }, + top10ContCPU { gridPos: { x: 0, y: 36, w: 12, h: 8 } }, + goroutines_count { gridPos: { x: 12, y: 36, w: 12, h: 8 } }, + podDistribution { gridPos: { x: 0, y: 44, w: 24, h: 8 } }, + ] +), { gridPos: { x: 0, y: 3, w: 24, h: 1 } }) + +.addPanel(grafana.row.new(title='Node: $_worker_node', collapse=true, repeat='_worker_node').addPanels( + [ + nodeCPU('$_worker_node') { gridPos: { x: 0, y: 0, w: 12, h: 8 } }, + nodeMemory('$_worker_node') { gridPos: { x: 12, y: 0, w: 12, h: 8 } }, + diskThroughput('$_worker_node') { gridPos: { x: 0, y: 8, w: 12, h: 8 } }, + diskIOPS('$_worker_node') { gridPos: { x: 12, y: 8, w: 12, h: 8 } }, + networkUtilization('$_worker_node') { gridPos: { x: 0, y: 16, w: 12, h: 8 } }, + networkPackets('$_worker_node') { gridPos: { x: 12, y: 16, w: 12, h: 8 } }, + networkDrop('$_worker_node') { gridPos: { x: 0, y: 24, w: 12, h: 8 } }, + conntrackStats('$_worker_node') { gridPos: { x: 12, y: 24, w: 12, h: 8 } }, + top10ContainerCPU('$_worker_node') { gridPos: { x: 0, y: 32, w: 12, h: 8 } }, + top10ContainerRSS('$_worker_node') { gridPos: { x: 12, y: 32, w: 12, h: 8 } }, + ], +), { gridPos: { x: 0, y: 1, w: 0, h: 8 } }) diff --git a/templates/General/kube-burner.jsonnet b/templates/General/kube-burner.jsonnet new file mode 100644 index 0000000..cdb5160 --- /dev/null +++ b/templates/General/kube-burner.jsonnet @@ -0,0 +1,4568 @@ +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; +local es = grafana.elasticsearch; + +local worker_count = grafana.statPanel.new( + title='Node count', + datasource='$datasource1', + justifyMode='center' +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "nodeRoles"', + timeField='timestamp', + metrics=[{ + field: 'coun', + id: '1', + meta: {}, + settings: {}, + type: 'count', + }], + bucketAggs=[ + { + field: 'labels.role.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +).addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, +]); + + +local metric_count_panel = grafana.statPanel.new( + datasource='$datasource1', + justifyMode='center', + title=null +).addTarget( + // Namespaces count + es.target( + query='uuid.keyword: $uuid AND metricName: "namespaceCount" AND labels.phase: "Active"', + alias='Namespaces', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +).addTarget( + // Services count + es.target( + query='uuid.keyword: $uuid AND metricName: "serviceCount"', + alias='Services', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +).addTarget( + // Deployments count + es.target( + query='uuid.keyword: $uuid AND metricName: "deploymentCount"', + alias='Services', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +).addTarget( + // Secrets count + es.target( + query='uuid.keyword: $uuid AND metricName: "secretCount"', + alias='Services', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +).addTarget( + // ConfigMap count + es.target( + query='uuid.keyword: $uuid AND metricName: "configmapCount"', + alias='ConfigMaps', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +).addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, +]); + +local openshift_version_panel = grafana.statPanel.new( + title='OpenShift version', + datasource='$datasource1', + justifyMode='center', + reducerFunction='lastNotNull', + fields='/^labels\\.version$/' +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "clusterVersion"', + timeField='timestamp', + metrics=[{ + id: '1', + settings: { + size: '500', + }, + type: 'raw_data', + }], + ) +); + +local etcd_version_panel = grafana.statPanel.new( + title='Etcd version', + datasource='$datasource1', + justifyMode='center', + reducerFunction='lastNotNull', + fields='labels.cluster_version' +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "etcdVersion"', + timeField='timestamp', + metrics=[{ + id: '1', + settings: { + size: '500', + }, + type: 'raw_data', + }], + ) +); + + +// Next line +// TODO: Convert to new table format once jsonnet supports it. +// That would fix the text wrapping problem. +local summary_panel_1 = grafana.tablePanel.new( + datasource='$datasource1', + title=null, + styles=[ + { + pattern: 'uuid', + alias: 'UUID', + type: 'string', + }, + { + pattern: 'jobConfig.name', + alias: 'Name', + type: 'hidden', + }, + { + pattern: 'jobConfig.qps', + alias: 'QPS', + type: 'number', + }, + { + pattern: 'jobConfig.burst', + alias: 'Burst', + type: 'number', + }, + { + pattern: 'elapsedTime', + alias: 'Elapsed time', + type: 'number', + unit: 's', + }, + { + pattern: 'jobConfig.jobIterations', + alias: 'Iterations', + type: 'number', + }, + { + pattern: 'jobConfig.jobType', + alias: 'Job Type', + type: 'string', + }, + { + pattern: 'jobConfig.podWait', + alias: 'podWait', + type: 'hidden', + }, + { + pattern: 'jobConfig.namespacedIterations', + alias: 'Namespaced iterations', + type: 'hidden', + }, + { + pattern: 'jobConfig.preLoadImages', + alias: 'Preload Images', + type: 'boolean', + }, + { + pattern: '_id', + alias: '_id', + type: 'hidden', + }, + { + pattern: '_index', + alias: '_index', + type: 'hidden', + }, + { + pattern: '_type', + alias: '_type', + type: 'hidden', + }, + { + pattern: 'highlight', + alias: 'highlight', + type: 'hidden', + }, + { + pattern: '_type', + alias: '_type', + type: 'hidden', + }, + { + pattern: 'jobConfig.cleanup', + type: 'hidden', + }, + { + pattern: 'jobConfig.errorOnVerify', + alias: 'errorOnVerify', + type: 'hidden', + }, + { + pattern: 'jobConfig.jobIterationDelay', + alias: 'jobIterationDelay', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.jobPause', + alias: 'jobPause', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.maxWaitTimeout', + alias: 'maxWaitTimeout', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.namespace', + alias: 'namespacePrefix', + type: 'hidden', + }, + { + pattern: 'jobConfig.namespaced', + alias: 'jobConfig.namespaced', + type: 'hidden', + }, + { + pattern: 'jobConfig.objects', + alias: 'jobConfig.objects', + type: 'hidden', + }, + { + pattern: 'jobConfig.preLoadPeriod', + alias: 'jobConfig.preLoadPeriod', + type: 'hidden', + }, + { + pattern: 'jobConfig.verifyObjects', + alias: 'jobConfig.verifyObjects', + type: 'hidden', + }, + { + pattern: 'metricName', + alias: 'metricName', + type: 'hidden', + }, + { + pattern: 'timestamp', + alias: 'timestamp', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitFor', + alias: 'jobConfig.waitFor', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitForDeletion', + alias: 'jobConfig.waitForDeletion', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitWhenFinished', + alias: 'jobConfig.waitWhenFinished', + type: 'hidden', + }, + { + pattern: 'sort', + alias: 'sort', + type: 'hidden', + }, + ] +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "jobSummary"', + timeField='timestamp', + metrics=[{ + id: '1', + settings: { + size: '500', + }, + type: 'raw_data', + }], + ) +).addTransformation( + grafana.transformation.new('organize', options={ + indexByName: { + _id: 1, + _index: 2, + _type: 3, + elapsedTime: 8, + 'jobConfig.burst': 7, + 'jobConfig.cleanup': 12, + 'jobConfig.errorOnVerify': 13, + 'jobConfig.jobIterationDelay': 14, + 'jobConfig.jobIterations': 9, + 'jobConfig.jobPause': 15, + 'jobConfig.jobType': 10, + 'jobConfig.maxWaitTimeout': 16, + 'jobConfig.name': 5, + 'jobConfig.namespace': 17, + 'jobConfig.namespacedIterations': 18, + 'jobConfig.objects': 19, + 'jobConfig.podWait': 11, + 'jobConfig.qps': 6, + 'jobConfig.verifyObjects': 20, + 'jobConfig.waitFor': 21, + 'jobConfig.waitForDeletion': 22, + 'jobConfig.waitWhenFinished': 23, + metricName: 24, + timestamp: 0, + uuid: 4, + }, + }) +); + + +// TODO: Convert to new table format once jsonnet supports it. +// That would fix the text wrapping problem. +local summary_panel_2 = grafana.tablePanel.new( + datasource='$datasource1', + title=null, + styles=[ + { + pattern: 'k8s_version', + alias: 'k8s version', + type: 'string', + }, + { + pattern: 'result', + alias: 'Result', + type: 'string', + }, + { + pattern: 'sdn_type', + alias: 'SDN', + type: 'string', + }, + { + pattern: 'total_nodes', + alias: 'Total nodes', + type: 'number', + }, + { + pattern: 'master_nodes_count', + alias: 'Master nodes', + type: 'number', + }, + { + pattern: 'worker_nodes_count', + alias: 'Worker nodes', + type: 'number', + }, + { + pattern: 'infra_nodes_count', + alias: 'Infra nodes', + type: 'number', + }, + { + pattern: 'master_nodes_type', + alias: 'Masters flavor', + type: 'string', + }, + { + pattern: '_id', + alias: '_id', + type: 'hidden', + }, + { + pattern: '_index', + alias: '_index', + type: 'hidden', + }, + { + pattern: '_type', + alias: '_type', + type: 'hidden', + }, + { + pattern: 'benchmark', + alias: 'benchmark', + type: 'hidden', + }, + { + pattern: 'clustertype', + alias: 'clustertype', + type: 'hidden', + }, + { + pattern: 'end_date', + alias: 'end_date', + type: 'hidden', + }, + { + pattern: 'highlight', + alias: 'highlight', + type: 'hidden', + }, + { + pattern: 'jobConfig.cleanup', + alias: 'jobConfig.cleanup', + type: 'hidden', + }, + { + pattern: 'jobConfig.errorOnVerify', + alias: 'errorOnVerify', + type: 'hidden', + }, + { + pattern: 'jobConfig.jobIterationDelay', + alias: 'jobIterationDelay', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.jobPause', + alias: 'jobPause', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.maxWaitTimeout', + alias: 'maxWaitTimeout', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.namespace', + alias: 'namespacePrefix', + type: 'hidden', + }, + { + pattern: 'jobConfig.namespaced', + alias: 'jobConfig.namespaced', + type: 'hidden', + }, + { + pattern: 'jobConfig.objects', + alias: 'jobConfig.objects', + type: 'hidden', + }, + { + pattern: 'jobConfig.preLoadPeriod', + alias: 'jobConfig.preLoadPeriod', + type: 'hidden', + }, + { + pattern: 'jobConfig.verifyObjects', + alias: 'jobConfig.verifyObjects', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitFor', + alias: 'jobConfig.waitFor', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitForDeletion', + alias: 'jobConfig.waitForDeletion', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitWhenFinished', + alias: 'jobConfig.waitWhenFinished', + type: 'hidden', + }, + { + pattern: 'metricName', + alias: 'metricName', + type: 'hidden', + }, + { + pattern: 'ocp_version', + alias: 'ocp_version', + type: 'hidden', + }, + { + pattern: 'ocp_version', + alias: 'ocp_version', + type: 'hidden', + }, + { + pattern: 'sort', + alias: 'sort', + type: 'hidden', + }, + { + pattern: 'timestamp', + alias: 'timestamp', + type: 'hidden', + }, + { + pattern: 'uuid', + alias: 'uuid', + type: 'hidden', + }, + { + pattern: 'workload', + alias: 'workload', + type: 'hidden', + }, + { + pattern: 'worker_nodes_type', + alias: 'worker_nodes_type', + type: 'hidden', + }, + { + pattern: 'infra_nodes_type', + alias: 'infra_nodes_type', + type: 'hidden', + }, + { + pattern: 'platform', + alias: 'platform', + type: 'hidden', + }, + { + pattern: 'workload_nodes_count', + alias: 'workload_nodes_count', + type: 'hidden', + }, + { + pattern: 'workload_nodes_type', + alias: 'workload_nodes_type', + type: 'hidden', + }, + ] +).addTarget( + es.target( + query='uuid.keyword: $uuid AND result.keyword: *', + timeField='timestamp', + metrics=[{ + id: '1', + settings: { + size: '500', + }, + type: 'raw_data', + }], + ) +).addTransformation( + grafana.transformation.new('organize', options={ + indexByName: { + _id: 4, + _index: 5, + _type: 15, + benchmark: 17, + clustertype: 18, + end_date: 19, + highlight: 20, + infra_nodes_count: 9, + infra_nodes_type: 14, + k8s_version: 1, + master_nodes_count: 7, + master_nodes_type: 11, + ocp_version: 21, + platform: 22, + result: 2, + sdn_type: 3, + sort: 23, + timestamp: 0, + total_nodes: 6, + uuid: 16, + worker_nodes_count: 8, + worker_nodes_type: 12, + workload: 24, + workload_nodes_count: 10, + workload_nodes_type: 13, + }, + }) +); + +// First row: Cluster status +local masters_cpu = grafana.graphPanel.new( + title='Masters CPU utilization', + datasource='$datasource1', + legend_alignAsTable=true, + legend_avg=true, + legend_max=true, + percentage=true, + legend_values=true, + format='percent', +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Masters" AND NOT labels.mode.keyword: idle AND NOT labels.mode.keyword: steal', + timeField='timestamp', + alias='{{labels.instance.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: { + script: '_value * 100', + }, + type: 'sum', + }], + bucketAggs=[ + { + field: 'labels.instance.keyword', + fake: true, + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +local masters_memory = grafana.graphPanel.new( + title='Masters Memory utilization', + datasource='$datasource1', + legend_alignAsTable=true, + legend_avg=true, + legend_max=true, + legend_values=true, + format='bytes' +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Masters"', + timeField='timestamp', + alias='Available {{labels.instance.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'sum', + }], + bucketAggs=[ + { + field: 'labels.instance.keyword', + fake: true, + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +local node_status_summary = grafana.graphPanel.new( + title='Node Status Summary', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_current=true, + legend_values=true, + legend_rightSide=true, +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeStatus"', + timeField='timestamp', + alias='{{labels.condition.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.condition.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local pod_status_summary = grafana.graphPanel.new( + title='Pod Status Summary', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_current=true, + legend_values=true, +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "podStatusCount"', + timeField='timestamp', + alias='{{labels.phase.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.phase.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local kube_api_cpu = grafana.graphPanel.new( + title='Kube-apiserver CPU', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='Avg CPU {{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); +// TODO: When the feature is added to grafannet, style the average differently. + + +local kube_api_memory = grafana.graphPanel.new( + title='Kube-apiserver Memory', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='Rss {{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='Rss {{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='Avg Rss {{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); +// TODO: When the feature is added to grafannet, style the average differently. + + +local active_controller_manager_cpu = grafana.graphPanel.new( + title='Active Kube-controller-manager CPU', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-controller-manager', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '1', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-controller-manager', + timeField='timestamp', + alias='{{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '1', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local active_controller_manager_memory = grafana.graphPanel.new( + title='Active Kube-controller-manager memory', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-controller-manager', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '1', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-controller-manager', + timeField='timestamp', + alias='{{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '1', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local kube_scheduler_cpu = grafana.graphPanel.new( + title='Kube-scheduler CPU', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-scheduler', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-scheduler', + timeField='timestamp', + alias='{{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local kube_scheduler_memory = grafana.graphPanel.new( + title='Kube-scheduler memory', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-scheduler', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-scheduler', + timeField='timestamp', + alias='Rss {{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local hypershift_controlplane_cpu = grafana.graphPanel.new( + title='Hypershift Controlplane CPU Usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU-Controlplane"', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '20', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '20', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '4', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + + +local hypershift_controlplane_memory = grafana.graphPanel.new( + title='Hypershift Controlplane RSS memory Usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory-Controlplane"', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '20', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '20', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '4', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +// Pod latencies section +local average_pod_latency = grafana.graphPanel.new( + title='Average pod latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_min=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='ms', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: podLatencyMeasurement', + timeField='timestamp', + alias='{{field}}', + metrics=[ + { + field: 'podReadyLatency', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }, + { + field: 'schedulingLatency', + id: '3', + meta: {}, + settings: {}, + type: 'avg', + }, + { + field: 'initializedLatency', + id: '4', + meta: {}, + settings: {}, + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +local pod_latencies_summary = grafana.statPanel.new( + datasource='$datasource1', + justifyMode='center', + title='Pod latencies summary $latencyPercentile', + unit='ms', + colorMode='value', // Note: There isn't currently a way to set the color palette. +).addTarget( + // Namespaces count + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: podLatencyQuantilesMeasurement', + alias='$latencyPercentile {{term quantileName.keyword}}', + timeField='timestamp', + metrics=[{ + field: '$latencyPercentile', + id: '1', + meta: {}, + settings: {}, + type: 'max', + }], + bucketAggs=[ + { + fake: true, + field: 'quantileName.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '0', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +local pod_conditions_latency = grafana.tablePanel.new( + title='Pod conditions latency', + datasource='$datasource1', + transform='table', + styles=[ + { + pattern: 'Average containersReadyLatency', + alias: 'ContainersReady', + type: 'number', + unit: 'ms', + }, + { + pattern: 'Average initializedLatency', + alias: 'Initialized', + type: 'number', + unit: 'ms', + }, + { + pattern: 'Average podReadyLatency', + alias: 'Ready', + type: 'number', + unit: 'ms', + }, + { + pattern: 'Average schedulingLatency', + alias: 'Scheduling', + type: 'number', + unit: 'ms', + }, + { + pattern: 'namespace.keyword', + alias: 'Namespace', + type: 'string', + }, + { + pattern: 'podName.keyword', + alias: 'Pod', + type: 'string', + }, + { + pattern: 'nodeName.keyword', + alias: 'Node', + type: 'string', + }, + ], +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: podLatencyMeasurement', + timeField='timestamp', + metrics=[ + { + field: 'schedulingLatency', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }, + { + field: 'initializedLatency', + id: '3', + meta: {}, + settings: {}, + type: 'avg', + }, + { + field: 'containersReadyLatency', + id: '4', + meta: {}, + settings: {}, + type: 'avg', + }, + { + field: 'podReadyLatency', + id: '5', + meta: {}, + settings: {}, + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'namespace.keyword', + id: '6', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '5', + size: '100', + }, + type: 'terms', + }, + { + fake: true, + field: 'nodeName.keyword', + id: '7', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '100', + }, + type: 'terms', + }, + { + field: 'podName.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '5', + size: '100', + }, + type: 'terms', + }, + ], + ) +); + +local setup_latency = grafana.graphPanel.new( + title='Top 10 Container runtime network setup latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='µs', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: containerNetworkSetupLatency', + timeField='timestamp', + alias='{{labels.node.keyword}}', + metrics=[ + { + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.node.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local scheduling_throughput = grafana.graphPanel.new( + title='Scheduling throughput', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='reqps', +) + .addTarget( + es.target( + query='uuid: $uuid AND metricName.keyword: schedulingThroughput', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// OVN section +local ovnkube_master_cpu = grafana.graphPanel.new( + title='ovnkube-master CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.pod.keyword: /ovnkube-master.*/', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '3', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + + +local ovnkube_master_memory = grafana.graphPanel.new( + title='ovnkube-master Memory usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.pod.keyword: /ovnkube-master.*/', + timeField='timestamp', + alias='{{labels.pod.keyword}}', + metrics=[ + { + field: 'value', + id: '1', + type: 'sum', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '3', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +local ovnkube_controller_cpu = grafana.graphPanel.new( + title='ovn-controller CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.namespace.keyword: "openshift-ovn-kubernetes" AND labels.pod.keyword: /ovnkube-node.*/ AND labels.container.keyword: "ovn-controller"', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '3', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + + +local ovnkube_controller_memory = grafana.graphPanel.new( + title='ovn-controller Memory usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.namespace.keyword: "openshift-ovn-kubernetes" AND labels.pod.keyword: /ovnkube-node.*/ AND labels.container.keyword: "ovn-controller"', + timeField='timestamp', + alias='{{labels.pod.keyword}}', + metrics=[ + { + field: 'value', + id: '1', + type: 'sum', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '3', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + + +// ETCD section +local etcd_fsync_latency = grafana.graphPanel.new( + title='etcd 99th disk WAL fsync latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "99thEtcdDiskWalFsyncDurationSeconds"', + timeField='timestamp', + alias='{{labels.pod.keyword}}', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local etcd_commit_latency = grafana.graphPanel.new( + title='etcd 99th disk backend commit latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "99thEtcdDiskBackendCommitDurationSeconds"', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local etcd_leader_changes = grafana.graphPanel.new( + title='Etcd leader changes', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_values=true, + min=0, + format='s', +) + .addTarget( + es.target( + query='uuid: $uuid AND metricName.keyword: etcdLeaderChangesRate', + alias='Etcd leader changes', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '1', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local etcd_peer_roundtrip_time = grafana.graphPanel.new( + title='Etcd 99th network peer roundtrip time', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: 99thEtcdRoundTripTimeSeconds', + alias='{{labels.pod.keyword}} to {{labels.To.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + fake: true, + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + fake: true, + field: 'labels.To.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local etcd_cpu = grafana.graphPanel.new( + title='Etcd CPU utilization', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: etcd', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + fake: true, + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local etcd_memory = grafana.graphPanel.new( + title='Etcd memory utilization', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: etcd', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + fake: true, + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// API an Kubeproxy section + +local api_latency_read_only_resource = grafana.graphPanel.new( + title='Read Only API request P99 latency - resource scoped', + datasource='$datasource1', + legend_alignAsTable=true, + format='s', + legend_max=true, + legend_avg=true, + legend_values=true, +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: resource', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.verb.keyword', + id: '3', + settings: { + min_doc_count: 0, + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.resource.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local api_latency_read_only_namespace = grafana.graphPanel.new( + title='Read Only API request P99 latency - namespace scoped', + datasource='$datasource1', + legend_alignAsTable=true, + format='s', + legend_max=true, + legend_avg=true, + legend_values=true, +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: namespace', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.verb.keyword', + id: '3', + settings: { + min_doc_count: 0, + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local api_latency_read_only_cluster = grafana.graphPanel.new( + title='Read Only API request P99 latency - cluster scoped', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: cluster', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.verb.keyword', + id: '3', + settings: { + min_doc_count: 0, + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local api_latency_mutating = grafana.graphPanel.new( + title='Mutating API request P99 latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: mutatingAPICallsLatency', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.verb.keyword', + id: '3', + settings: { + min_doc_count: 0, + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local api_request_rate = grafana.graphPanel.new( + title='API request rate', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: APIRequestRate', + alias='{{labels.verb.keyword}} {{labels.resource.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.resource.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + fake: true, + field: 'labels.verb.keyword', + id: '3', + settings: { + min_doc_count: 0, + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local service_sync_latency = grafana.graphPanel.new( + title='Service sync latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid: $uuid AND metricName.keyword: kubeproxyP99ProgrammingLatency', + alias='Latency', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.instance.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid: $uuid AND metricName.keyword: serviceSyncLatency', + alias='Latency', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// Cluster Kubelet & CRI-O section +local kubelet_process_cpu = grafana.graphPanel.new( + title='Kubelet process CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: kubeletCPU', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.node.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local kubelet_process_memory = grafana.graphPanel.new( + title='Kubelet process RSS memory usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: kubeletMemory', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.node.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local cri_o_process_cpu = grafana.graphPanel.new( + title='CRI-O process CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: crioCPU', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.node.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local cri_o_process_memory = grafana.graphPanel.new( + title='CRI-O RSS memory usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: crioMemory', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.node.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// Master Node section + +local container_cpu_master = grafana.graphPanel.new( + title='Container CPU usage $master', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $master AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local container_memory_master = grafana.graphPanel.new( + title='Container RSS memory $master', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $master AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local cpu_master = grafana.graphPanel.new( + title='CPU $master', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_min=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Masters" AND labels.instance.keyword: $master', + alias='{{labels.mode.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + settings: { + script: { + inline: '_value*100', + }, + }, + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.mode.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local memory_master = grafana.graphPanel.new( + title='Memory $master', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Masters" AND labels.instance.keyword: $master', + alias='Available', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Masters" AND labels.instance.keyword: $master', + alias='Total', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Masters" AND labels.instance.keyword: $master', + alias='Utilization', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// Worker Node section + +local container_cpu_worker = grafana.graphPanel.new( + title='Container CPU usage $worker', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $worker AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local container_memory_worker = grafana.graphPanel.new( + title='Container RSS memory $worker', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $worker AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local cpu_worker = grafana.graphPanel.new( + title='CPU $worker', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_min=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Workers" AND labels.instance.keyword: $worker', + alias='{{labels.mode.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + settings: { + script: { + inline: '_value*100', + }, + }, + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.mode.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local memory_worker = grafana.graphPanel.new( + title='Memory $worker', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Workers" AND labels.instance.keyword: $worker', + alias='Available', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Workers" AND labels.instance.keyword: $worker', + alias='Total', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Workers" AND labels.instance.keyword: $worker', + alias='Utilization', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +// Infra Node section + +local container_cpu_infra = grafana.graphPanel.new( + title='Container CPU usage $infra', + datasource='$datasource1', + legend_alignAsTable=true, + legend_avg=true, + legend_max=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $infra AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local container_memory_infra = grafana.graphPanel.new( + title='Container RSS memory $infra', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $infra AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local cpu_infra = grafana.graphPanel.new( + title='CPU $infra', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_min=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Infra" AND labels.instance.keyword: $infra', + alias='{{labels.mode.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + settings: { + script: { + inline: '_value*100', + }, + }, + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.mode.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local memory_infra = grafana.graphPanel.new( + title='Memory $infra', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Infra" AND labels.instance.keyword: $infra', + alias='Available', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Infra" AND labels.instance.keyword: $infra', + alias='Total', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Infra" AND labels.instance.keyword: $infra', + alias='Utilization', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// Aggregated worker node usage section +local agg_avg_cpu = grafana.graphPanel.new( + title='Avg CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_avg=true, + legend_max=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-AggregatedWorkers"', + alias='{{labels.mode.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + settings: { + script: { + inline: '_value*100', + }, + }, + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.mode.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local agg_avg_mem = grafana.graphPanel.new( + title='Avg Memory', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-AggregatedWorkers"', + alias='Available', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-AggregatedWorkers"', + alias='Total', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local agg_container_cpu = grafana.graphPanel.new( + title='Container CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "containerCPU-AggregatedWorkers" AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}}: {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local agg_container_mem = grafana.graphPanel.new( + title='Container memory RSS', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "containerMemory-AggregatedWorkers" AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}}: {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + fake: true, + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +//Dashboard & Templates + +grafana.dashboard.new( + 'Kube-burner report v2', + description='', + editable='true', + time_from='now/y', + time_to='now', + timezone='utc', +) +.addTemplate( + grafana.template.datasource( + 'datasource1', + 'elasticsearch', + 'AWS Dev - ripsaw-kube-burner', + label='Datasource', + regex='/.*kube-burner.*/' + ) +) +.addTemplate( + grafana.template.new( + label='Platform', + name='platform', + current='All', + query='{"find": "terms", "field": "platform.keyword"}', + refresh=2, + multi=true, + includeAll=true, + datasource='$datasource1', + ) +) +.addTemplate( + grafana.template.new( + label='SDN type', + name='sdn', + current='All', + query='{"find": "terms", "field": "sdn_type.keyword"}', + refresh=2, + multi=true, + includeAll=true, + datasource='$datasource1', + ) +) +.addTemplate( + grafana.template.new( + label='Workload', + multi=true, + query='{"find": "terms", "field": "workload.keyword", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn"}', + refresh=1, + name='workload', + includeAll=false, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='Worker count', + multi=true, + query='{"find": "terms", "field": "worker_nodes_count", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn AND workload.keyword: $workload"}', + refresh=1, + name='worker_count', + includeAll=true, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='UUID', + multi=false, + query='{"find": "terms", "field": "uuid.keyword", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn AND workload.keyword: $workload AND worker_nodes_count: $worker_count"}', + refresh=2, + name='uuid', + includeAll=false, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='Master nodes', + multi=true, + query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: master AND uuid.keyword: $uuid"}', + refresh=2, + name='master', + includeAll=false, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='Worker nodes', + multi=true, + query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: worker AND uuid.keyword: $uuid"}', + refresh=2, + name='worker', + includeAll=false, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='Infra nodes', + multi=true, + query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: infra AND uuid.keyword: $uuid"}', + refresh=2, + name='infra', + includeAll=false, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='Namespace', + multi=true, + query='{ "find" : "terms", "field": "labels.namespace.keyword", "query": "labels.namespace.keyword: /openshift-.*/ AND uuid.keyword: $uuid"}', + refresh=2, + name='namespace', + includeAll=true, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.custom( + label='Latency percentile', + name='latencyPercentile', + current='P99', + query='P99, P95, P50', + multi=false, + includeAll=false, + ) +) +.addPanels( + [ + worker_count { gridPos: { x: 0, y: 0, w: 4, h: 3 } }, + metric_count_panel { gridPos: { x: 4, y: 0, w: 12, h: 3 } }, + openshift_version_panel { gridPos: { x: 16, y: 0, w: 6, h: 3 } }, + etcd_version_panel { gridPos: { x: 22, y: 0, w: 2, h: 3 } }, + summary_panel_1 { gridPos: { x: 0, y: 3, h: 2, w: 24 } }, + summary_panel_2 { gridPos: { x: 0, y: 5, h: 2, w: 24 } }, + ], +) +.addPanel( + grafana.row.new(title='Cluster status', collapse=true).addPanels( + [ + masters_cpu { gridPos: { x: 0, y: 8, w: 12, h: 9 } }, + masters_memory { gridPos: { x: 12, y: 8, w: 12, h: 9 } }, + node_status_summary { gridPos: { x: 0, y: 17, w: 12, h: 8 } }, + pod_status_summary { gridPos: { x: 12, y: 17, w: 12, h: 8 } }, + kube_api_cpu { gridPos: { x: 0, y: 25, w: 12, h: 9 } }, + kube_api_memory { gridPos: { x: 12, y: 25, w: 12, h: 9 } }, + active_controller_manager_cpu { gridPos: { x: 0, y: 34, w: 12, h: 9 } }, + active_controller_manager_memory { gridPos: { x: 12, y: 34, w: 12, h: 9 } }, + kube_scheduler_cpu { gridPos: { x: 0, y: 43, w: 12, h: 9 } }, + kube_scheduler_memory { gridPos: { x: 12, y: 43, w: 12, h: 9 } }, + hypershift_controlplane_cpu { gridPos: { x: 0, y: 52, w: 12, h: 9 } }, + hypershift_controlplane_memory { gridPos: { x: 12, y: 52, w: 12, h: 9 } }, + ] + ), { x: 0, y: 7, w: 24, h: 1 } +) +.addPanel( + // Panels below for uncollapsed row. + grafana.row.new(title='Pod latency stats', collapse=false), { x: 0, y: 8, w: 24, h: 1 } +) +.addPanels( + [ + average_pod_latency { gridPos: { x: 0, y: 9, w: 12, h: 8 } }, + pod_latencies_summary { gridPos: { x: 12, y: 9, w: 12, h: 8 } }, + pod_conditions_latency { gridPos: { x: 0, y: 17, w: 24, h: 10 } }, + setup_latency { gridPos: { x: 0, y: 27, w: 12, h: 9 } }, + scheduling_throughput { gridPos: { x: 12, y: 27, w: 12, h: 9 } }, + ] +) +.addPanel( + grafana.row.new(title='OVNKubernetes', collapse=true).addPanels( + [ + ovnkube_master_cpu { gridPos: { x: 0, y: 80, w: 12, h: 8 } }, + ovnkube_master_memory { gridPos: { x: 12, y: 80, w: 12, h: 8 } }, + ovnkube_controller_cpu { gridPos: { x: 0, y: 88, w: 12, h: 8 } }, + ovnkube_controller_memory { gridPos: { x: 12, y: 88, w: 12, h: 8 } }, + ] + ), { x: 0, y: 36, w: 24, h: 1 } +) +.addPanel( + grafana.row.new(title='etcd', collapse=false), { x: 0, y: 37, w: 24, h: 1 } +) +.addPanels( + [ + etcd_fsync_latency { gridPos: { x: 0, y: 38, w: 12, h: 9 } }, + etcd_commit_latency { gridPos: { x: 12, y: 38, w: 12, h: 9 } }, + etcd_leader_changes { gridPos: { x: 0, y: 47, w: 12, h: 9 } }, + etcd_peer_roundtrip_time { gridPos: { x: 12, y: 47, w: 12, h: 9 } }, + etcd_cpu { gridPos: { x: 0, y: 56, w: 12, h: 9 } }, + etcd_memory { gridPos: { x: 12, y: 56, w: 12, h: 9 } }, + ], +) +.addPanel( + grafana.row.new(title='API and Kubeproxy', collapse=false), { x: 0, y: 65, w: 24, h: 1 } +) +.addPanels( + [ + api_latency_read_only_resource { gridPos: { x: 0, y: 66, w: 12, h: 9 } }, + api_latency_read_only_namespace { gridPos: { x: 12, y: 66, w: 12, h: 9 } }, + api_latency_read_only_cluster { gridPos: { x: 0, y: 75, w: 12, h: 9 } }, + api_latency_mutating { gridPos: { x: 12, y: 75, w: 12, h: 9 } }, + api_request_rate { gridPos: { x: 0, y: 84, w: 12, h: 9 } }, + service_sync_latency { gridPos: { x: 12, y: 84, w: 12, h: 9 } }, + ], +) + +.addPanel( + grafana.row.new(title='Cluster Kubelet & CRI-O', collapse=false), { x: 0, y: 93, w: 24, h: 1 } +) +.addPanels( + [ + kubelet_process_cpu { gridPos: { x: 0, y: 94, w: 12, h: 8 } }, + kubelet_process_memory { gridPos: { x: 12, y: 94, w: 12, h: 8 } }, + cri_o_process_cpu { gridPos: { x: 0, y: 103, w: 12, h: 8 } }, + cri_o_process_memory { gridPos: { x: 12, y: 103, w: 12, h: 8 } }, + ], +) + +.addPanel( + grafana.row.new(title='Master: $master', collapse=true, repeat='$master').addPanels( + [ + container_cpu_master { gridPos: { x: 0, y: 112, w: 12, h: 9 } }, + container_memory_master { gridPos: { x: 12, y: 112, w: 12, h: 9 } }, + cpu_master { gridPos: { x: 0, y: 121, w: 12, h: 9 } }, + memory_master { gridPos: { x: 12, y: 121, w: 12, h: 9 } }, + ] + ), { x: 0, y: 111, w: 24, h: 1 } +) + +.addPanel( + grafana.row.new(title='Worker: $worker', collapse=true, repeat='$worker').addPanels( + [ + container_cpu_worker { gridPos: { x: 0, y: 112, w: 12, h: 9 } }, + container_memory_worker { gridPos: { x: 12, y: 112, w: 12, h: 9 } }, + cpu_worker { gridPos: { x: 0, y: 121, w: 12, h: 9 } }, + memory_worker { gridPos: { x: 12, y: 121, w: 12, h: 9 } }, + ] + ), { x: 0, y: 111, w: 24, h: 1 } +) + +.addPanel( + grafana.row.new(title='Infra: $infra', collapse=true, repeat='$infra').addPanels( + [ + container_cpu_infra { gridPos: { x: 0, y: 131, w: 12, h: 9 } }, + container_memory_infra { gridPos: { x: 12, y: 131, w: 12, h: 9 } }, + cpu_infra { gridPos: { x: 0, y: 140, w: 12, h: 9 } }, + memory_infra { gridPos: { x: 12, y: 140, w: 12, h: 9 } }, + ] + ), { x: 0, y: 130, w: 24, h: 1 } +) + +.addPanel( + grafana.row.new(title='Aggregated worker nodes usage (only in aggregated metrics profile)', collapse=true).addPanels( + [ + agg_avg_cpu { gridPos: { x: 0, y: 150, w: 12, h: 9 } }, + agg_avg_mem { gridPos: { x: 12, y: 150, w: 12, h: 9 } }, + agg_container_cpu { gridPos: { x: 0, y: 159, w: 12, h: 9 } }, + agg_container_mem { gridPos: { x: 12, y: 159, w: 12, h: 9 } }, + ] + ), { x: 0, y: 149, w: 24, h: 1 } +) diff --git a/templates/ocp-performance.jsonnet b/templates/General/ocp-performance.jsonnet similarity index 99% rename from templates/ocp-performance.jsonnet rename to templates/General/ocp-performance.jsonnet index bd9c7b3..49a7a42 100644 --- a/templates/ocp-performance.jsonnet +++ b/templates/General/ocp-performance.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; diff --git a/templates/ovn-dashboard.jsonnet b/templates/General/ovn-dashboard.jsonnet similarity index 99% rename from templates/ovn-dashboard.jsonnet rename to templates/General/ovn-dashboard.jsonnet index d9abada..2d1a3db 100644 --- a/templates/ovn-dashboard.jsonnet +++ b/templates/General/ovn-dashboard.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; local stat = grafana.statPanel; diff --git a/templates/pgbench-dashboard.jsonnet b/templates/General/pgbench-dashboard.jsonnet similarity index 98% rename from templates/pgbench-dashboard.jsonnet rename to templates/General/pgbench-dashboard.jsonnet index bd7d7c5..1f39d0f 100644 --- a/templates/pgbench-dashboard.jsonnet +++ b/templates/General/pgbench-dashboard.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local es = grafana.elasticsearch; local tps_report = grafana.graphPanel.new( diff --git a/templates/uperf-perf.jsonnet b/templates/General/uperf-perf.jsonnet similarity index 99% rename from templates/uperf-perf.jsonnet rename to templates/General/uperf-perf.jsonnet index 1ecd3b4..d70b3ab 100644 --- a/templates/uperf-perf.jsonnet +++ b/templates/General/uperf-perf.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local es = grafana.elasticsearch; diff --git a/templates/vegeta-wrapper.jsonnet b/templates/General/vegeta-wrapper.jsonnet similarity index 98% rename from templates/vegeta-wrapper.jsonnet rename to templates/General/vegeta-wrapper.jsonnet index 338bb95..eed3278 100644 --- a/templates/vegeta-wrapper.jsonnet +++ b/templates/General/vegeta-wrapper.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local es = grafana.elasticsearch; // Panels diff --git a/templates/ycsb.jsonnet b/templates/General/ycsb.jsonnet similarity index 99% rename from templates/ycsb.jsonnet rename to templates/General/ycsb.jsonnet index 893cefb..e6fa8c6 100644 --- a/templates/ycsb.jsonnet +++ b/templates/General/ycsb.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local es = grafana.elasticsearch; //Panel definitions From d43a9a625b3500305754d0567f119c38b6c5c250 Mon Sep 17 00:00:00 2001 From: Vishnu Challa Date: Fri, 29 Sep 2023 15:18:16 -0400 Subject: [PATCH 2/4] Kube-burner Report - OCP Wrapper Dashboard in latest grafonnet library --- Makefile | 39 +- README.md | 24 +- .../panels.libsonnet | 836 +++ .../queries.libsonnet | 1589 ++++++ .../variables.libsonnet | 77 + dittybopper/deploy.sh | 2 +- templates/CPT/k8s-perf.jsonnet | 499 -- .../kube-burner-report-ocp-wrapper-v2.jsonnet | 149 + templates/CPT/kube-burner.jsonnet | 4568 ----------------- templates/jsonnetfile.json | 15 + templates/jsonnetfile.lock.json | 46 + 11 files changed, 2765 insertions(+), 5079 deletions(-) create mode 100644 assets/kube-burner-report-ocp-wrapper/panels.libsonnet create mode 100644 assets/kube-burner-report-ocp-wrapper/queries.libsonnet create mode 100644 assets/kube-burner-report-ocp-wrapper/variables.libsonnet delete mode 100644 templates/CPT/k8s-perf.jsonnet create mode 100644 templates/CPT/kube-burner-report-ocp-wrapper-v2.jsonnet delete mode 100644 templates/CPT/kube-burner.jsonnet create mode 100644 templates/jsonnetfile.json create mode 100644 templates/jsonnetfile.lock.json diff --git a/Makefile b/Makefile index 2b6a8ff..f504244 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -JSONNET = https://github.com/google/jsonnet/releases/download/v0.17.0/jsonnet-bin-v0.17.0-linux.tar.gz +JB = https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-linux-amd64 BINDIR = bin TEMPLATESDIR = templates OUTPUTDIR = rendered @@ -6,15 +6,24 @@ ALLDIRS = $(BINDIR) $(OUTPUTDIR) SYNCER_IMG_TAG ?= quay.io/cloud-bulldozer/dittybopper-syncer:latest PLATFORM = linux/amd64,linux/arm64,linux/ppc64le,linux/s390x -# Get all templates at $(TEMPLATESDIR) -TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*.jsonnet) +ifeq ($(filter v2,$(MAKECMDGOALS)),v2) + # Set variables and instructions for v2 + TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*-v2.jsonnet) + LIBRARY_PATH := $(TEMPLATESDIR)/vendor + JSONNET := https://github.com/cloud-bulldozer/utils/releases/download/v0.0.0/jsonnet-bin-v0.20.0-linux.tar.gz +else + # Get all templates at $(TEMPLATESDIR) + TEMPLATES := $(filter-out %-v2.jsonnet, $(wildcard $(TEMPLATESDIR)/**/*.jsonnet)) + LIBRARY_PATH := $(TEMPLATESDIR)/grafonnet-lib + JSONNET := https://github.com/google/jsonnet/releases/download/v0.17.0/jsonnet-bin-v0.17.0-linux.tar.gz +endif # Replace $(TEMPLATESDIR)/*.jsonnet by $(OUTPUTDIR)/*.json outputs := $(patsubst $(TEMPLATESDIR)/%.jsonnet, $(OUTPUTDIR)/%.json, $(TEMPLATES)) all: deps format build -deps: $(ALLDIRS) $(TEMPLATESDIR)/grafonnet-lib $(BINDIR)/jsonnet +deps: $(ALLDIRS) $(BINDIR)/jsonnet $(LIBRARY_PATH) $(ALLDIRS): mkdir -p $(ALLDIRS) @@ -22,24 +31,34 @@ $(ALLDIRS): format: deps $(BINDIR)/jsonnetfmt -i $(TEMPLATES) -build: deps $(TEMPLATESDIR)/grafonnet-lib $(outputs) +build: deps $(LIBRARY_PATH) $(outputs) clean: @echo "Cleaning up" - rm -rf $(ALLDIRS) $(TEMPLATESDIR)/grafonnet-lib - -$(TEMPLATESDIR)/grafonnet-lib: - git clone --depth 1 https://github.com/grafana/grafonnet-lib.git $(TEMPLATESDIR)/grafonnet-lib + rm -rf $(ALLDIRS) $(TEMPLATESDIR)/vendor $(TEMPLATESDIR)/grafonnet-lib $(BINDIR)/jsonnet: @echo "Downloading jsonnet binary" curl -s -L $(JSONNET) | tar xz -C $(BINDIR) + @echo "Downloading jb binary" + curl -s -L $(JB) -o $(BINDIR)/jb + chmod +x $(BINDIR)/jb + +$(TEMPLATESDIR)/grafonnet-lib: + git clone --depth 1 https://github.com/grafana/grafonnet-lib.git $(TEMPLATESDIR)/grafonnet-lib + +$(TEMPLATESDIR)/vendor: + @echo "Downloading vendor files" + cd $(TEMPLATESDIR) && ../$(BINDIR)/jb install && cd ../ # Build each template and output to $(OUTPUTDIR) $(OUTPUTDIR)/%.json: $(TEMPLATESDIR)/%.jsonnet @echo "Building template $<" mkdir -p $(dir $@) - $(BINDIR)/jsonnet $< > $@ + $(BINDIR)/jsonnet -J ./$(LIBRARY_PATH) $< > $@ + +v2: all + @echo "Rendered the v2 dashboards with latest grafonnet library" build-syncer-image: build podman build --platform=${PLATFORM} -f Dockerfile --manifest=${SYNCER_IMG_TAG} . diff --git a/README.md b/README.md index e30f773..476c6bc 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,28 @@ bin/jsonnet templates/ocp-performance.jsonnet > rendered/ocp-performance.json $ ls rendered ocp-ingress-controller.json ocp-performance.json ``` +Similarly for V2, the dashboards that are built using latest grafonnet library, use +``` +$ make v2 +mkdir -p bin rendered +Downloading jsonnet binary +curl -s -L https://github.com/cloud-bulldozer/utils/releases/download/v0.0.0/jsonnet-bin-v0.20.0-linux.tar.gz | tar xz -C bin +Downloading jb binary +curl -s -L https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-linux-amd64 -o bin/jb +chmod +x bin/jb +Downloading vendor files +cd templates && ../bin/jb install && cd ../ +GET https://github.com/grafana/grafonnet/archive/f40876da40d787e9c288de0b547ac85597c781d9.tar.gz 200 +GET https://github.com/grafana/grafonnet/archive/f40876da40d787e9c288de0b547ac85597c781d9.tar.gz 200 +GET https://github.com/jsonnet-libs/docsonnet/archive/cc9df63eaca56f39e8e4e1ce192141333257b08d.tar.gz 200 +GET https://github.com/jsonnet-libs/xtd/archive/0256a910ac71f0f842696d7bca0bf01ea77eb654.tar.gz 200 +bin/jsonnetfmt -i templates/General/ocp-performance-v2.jsonnet +Building template templates/General/ocp-performance-v2.jsonnet +mkdir -p rendered/General/ +bin/jsonnet -J ./templates/vendor templates/General/ocp-performance-v2.jsonnet > rendered/General/ocp-performance-v2.json +Rendered the v2 dashboards with latest grafonnet library +``` +Rest all operations reamin same as before. In order to clean up the environment execute `make clean`. @@ -73,4 +95,4 @@ In addition, make sure to lint your modifications to jsonnet files if you don't The dashboards from this repository have been tested with the following versions: -- Grafana 7.X +- Grafana 7.X \ No newline at end of file diff --git a/assets/kube-burner-report-ocp-wrapper/panels.libsonnet b/assets/kube-burner-report-ocp-wrapper/panels.libsonnet new file mode 100644 index 0000000..b240e9c --- /dev/null +++ b/assets/kube-burner-report-ocp-wrapper/panels.libsonnet @@ -0,0 +1,836 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +{ + timeSeries: { + local timeSeries = g.panel.timeSeries, + local custom = timeSeries.fieldConfig.defaults.custom, + local options = timeSeries.options, + + base(title, unit, targets, gridPos, maxPoints): + timeSeries.new(title) + + timeSeries.queryOptions.withTargets(targets) + + timeSeries.queryOptions.withMaxDataPoints(maxPoints) + + timeSeries.datasource.withType('elasticsearch') + + timeSeries.datasource.withUid('$Datasource') + + timeSeries.standardOptions.withUnit(unit) + + timeSeries.gridPos.withX(gridPos.x) + + timeSeries.gridPos.withY(gridPos.y) + + timeSeries.gridPos.withH(gridPos.h) + + timeSeries.gridPos.withW(gridPos.w) + + custom.withSpanNulls(true) + + custom.withFillOpacity(10) + + options.tooltip.withMode('multi') + + options.tooltip.withSort('desc') + + options.legend.withShowLegend(true) + + options.legend.withPlacement('bottom') + + options.legend.withDisplayMode('table'), + + withCommonAggregations(title, unit, targets, gridPos, maxPoints): + self.base(title, unit, targets, gridPos, maxPoints) + + options.legend.withCalcs([ + 'mean', + 'max', + 'min' + ]), + + withCommonAggregationsRightPlacement(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withPlacement('right'), + + meanWithRightLegend(title, unit, targets, gridPos, maxPoints): + self.base(title, unit, targets, gridPos, maxPoints) + + options.legend.withCalcs([ + 'mean' + ]) + + options.legend.withPlacement('right'), + + withMeanMax(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withCalcs([ + 'mean', + 'max', + ]), + + withMinMax(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withCalcs([ + 'max', + 'min', + ]), + + sortByMeanCommon(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withSortBy('Mean') + + options.legend.withSortDesc(true), + + sortByMaxCommon(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withSortBy('Max') + + options.legend.withSortDesc(true), + + sortByMean(title, unit, targets, gridPos, maxPoints): + self.withMeanMax(title, unit, targets, gridPos, maxPoints) + + options.legend.withSortBy('Mean') + + options.legend.withSortDesc(true), + + sortByMax(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withCalcs([ + 'max', + 'mean', + ]) + + options.legend.withSortBy('Max') + + options.legend.withSortDesc(true), + + sortByMin(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withSortBy('Min') + + options.legend.withSortDesc(false), + + meanWithRightLegendCommons(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withCalcs([ + 'mean', + 'max', + 'lastNotNull', + ]) + + options.legend.withPlacement('right') + + options.legend.withSortBy('Mean') + + options.legend.withSortDesc(true), + + maxMeanWithRightLegend(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withCalcs([ + 'mean', + 'max', + ]) + + options.legend.withPlacement('right'), + + minMaxWithRightLegend(title, unit, targets, gridPos, maxPoints): + self.withMinMax(title, unit, targets, gridPos, maxPoints) + + options.legend.withPlacement('right'), + + sortMaxWithRightLegend(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withCalcs([ + 'lastNotNull', + 'max', + ]) + + options.legend.withPlacement('right') + + options.legend.withSortBy('Max') + + options.legend.withSortDesc(true), + + maxWithRightLegend(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withCalcs([ + 'lastNotNull', + 'max', + ]) + + options.legend.withPlacement('right'), + + allWithRightLegend(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withCalcs([ + "max", + "min", + "firstNotNull", + "lastNotNull" + ]) + + options.legend.withPlacement('right'), + + maxWithBottomLegend(title, unit, targets, gridPos, maxPoints): + self.withCommonAggregations(title, unit, targets, gridPos, maxPoints) + + options.legend.withCalcs([ + 'max', + 'lastNotNull', + ]) + + options.legend.withSortBy('Max') + + options.legend.withSortDesc(true), + + workerCPUCustomOverrides(title, unit, targets, gridPos, maxPoints): + self.withMeanMax(title, unit, targets, gridPos, maxPoints) + + options.legend.withPlacement('right') + + timeSeries.standardOptions.withOverrides([ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "user", + "system", + "softirq", + "iowait", + "irq" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ]), + + kupeApiCustomOverrides(title, unit, targets, gridPos, maxPoints): + self.sortByMax(title, unit, targets, gridPos, maxPoints) + + options.tooltip.withMode('multi') + + options.legend.withSortDesc(false) + + timeSeries.standardOptions.withOverrides([ + { + "matcher": { + "id": "byRegexp", + "options": "/Rss.*/" + }, + "properties": [ + { + "id": "custom.showPoints", + "value": "always" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ]), + kupeApiAverageCustomOverrides(title, unit, targets, gridPos, maxPoints): + self.withMeanMax(title, unit, targets, gridPos, maxPoints) + + timeSeries.standardOptions.withOverrides([ + { + "matcher": { + "id": "byRegexp", + "options": "/Rss.*/" + }, + "properties": [ + { + "id": "custom.showPoints", + "value": "auto" + }, + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "Avg CPU kube-apiserver" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ]), + activeKubeControllerManagerOverrides(title, unit, targets, gridPos, maxPoints): + self.sortByMax(title, unit, targets, gridPos, maxPoints) + + timeSeries.standardOptions.withOverrides([ + { + "matcher": { + "id": "byRegexp", + "options": "/Rss.*/" + }, + "properties": [ + { + "id": "custom.showPoints", + "value": "always" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ]), + kubeSchedulerUsageOverrides(title, unit, targets, gridPos, maxPoints): + self.withMeanMax(title, unit, targets, gridPos, maxPoints) + + timeSeries.standardOptions.withOverrides([ + { + "matcher": { + "id": "byRegexp", + "options": "/Rss.*/" + }, + "properties": [ + { + "id": "custom.showPoints", + "value": "always" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ]), + etcd99thNetworkPeerRTOverrides(title, unit, targets, gridPos, maxPoints): + self.withMeanMax(title, unit, targets, gridPos, maxPoints) + + timeSeries.standardOptions.withOverrides([ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Logical.*/" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + }, + { + "id": "custom.axisPlacement", + "value": "hidden" + } + ] + } + ]), + etcdResouceUtilizationOverrides(title, unit, targets, gridPos, maxPoints): + self.sortByMaxCommon(title, unit, targets, gridPos, maxPoints) + + timeSeries.standardOptions.withOverrides([ + { + "matcher": { + "id": "byRegexp", + "options": "/Rss.*/" + }, + "properties": [ + { + "id": "custom.showPoints", + "value": "always" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ]), + etcd99thDiskWalLatencyOverrides(title, unit, targets, gridPos, maxPoints): + self.sortByMean(title, unit, targets, gridPos, maxPoints) + + timeSeries.standardOptions.thresholds.withMode("absolute") + + custom.withThresholdsStyle({ + "mode": "line+area" + }) + + timeSeries.standardOptions.thresholds.withSteps([ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 0.01 + } + ]) + + timeSeries.standardOptions.color.withMode("palette-classic"), + etcd99thCommitLatencyOverrides(title, unit, targets, gridPos, maxPoints): + self.withMeanMax(title, unit, targets, gridPos, maxPoints) + + timeSeries.standardOptions.thresholds.withMode("absolute") + + custom.withThresholdsStyle({ + "mode": "line+area" + }) + + timeSeries.standardOptions.thresholds.withSteps([ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 0.02 + } + ]) + + timeSeries.standardOptions.color.withMode("palette-classic"), + readOnlyAPIRequestp99ResourceOverrides(title, unit, targets, gridPos, maxPoints): + self.sortByMax(title, unit, targets, gridPos, maxPoints) + + custom.withThresholdsStyle({ + "mode": "line+area" + }) + + timeSeries.standardOptions.thresholds.withSteps([ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 1 + } + ]), + readOnlyAPIRequestp99NamespaceOverrides(title, unit, targets, gridPos, maxPoints): + self.withMeanMax(title, unit, targets, gridPos, maxPoints) + + custom.withThresholdsStyle({ + "mode": "line+area" + }) + + timeSeries.standardOptions.thresholds.withSteps([ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 5 + } + ]), + readOnlyAPIRequestp99ClusterOverrides(title, unit, targets, gridPos, maxPoints): + self.withMeanMax(title, unit, targets, gridPos, maxPoints) + + custom.withThresholdsStyle({ + "mode": "line+area" + }) + + timeSeries.standardOptions.thresholds.withSteps([ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 30 + } + ]), + readOnlyAPIRequestp99MutatingOverrides(title, unit, targets, gridPos, maxPoints): + self.sortByMax(title, unit, targets, gridPos, maxPoints) + + custom.withThresholdsStyle({ + "mode": "line+area" + }) + + timeSeries.standardOptions.thresholds.withSteps([ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 1 + } + ]), + }, + stat: { + local stat = g.panel.stat, + local options = stat.options, + + base(title, unit, targets, gridPos): + stat.new(title) + + stat.datasource.withType('elasticsearch') + + stat.datasource.withUid('$Datasource') + + stat.standardOptions.withUnit(unit) + + stat.queryOptions.withTargets(targets) + + stat.gridPos.withX(gridPos.x) + + stat.gridPos.withY(gridPos.y) + + stat.gridPos.withH(gridPos.h) + + stat.gridPos.withW(gridPos.w) + + options.withJustifyMode("center"), + + withMeanCalcs(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.reduceOptions.withCalcs([ + 'mean', + ]), + + withLastNotNullCalcs(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.reduceOptions.withCalcs([ + 'lastNotNull', + ]), + + withFieldSummary(title, unit, field, targets, gridPos): + self.withLastNotNullCalcs(title, unit, targets, gridPos) + + options.reduceOptions.withFields(field), + + withMeanThresholds(title, unit, targets, gridPos): + self.withMeanCalcs(title, unit, targets, gridPos) + + stat.standardOptions.thresholds.withMode("absolute") + + stat.standardOptions.thresholds.withSteps([{"value": null,"color": "green"}, {"value": 5000,"color": "red"}]) + + stat.standardOptions.color.withMode("palette-classic"), + }, + table: { + local table = g.panel.table, + local options = table.options, + + base(title, unit, targets, gridPos): + table.new(title) + + table.datasource.withType('elasticsearch') + + table.datasource.withUid('$Datasource') + + table.standardOptions.withUnit(unit) + + table.queryOptions.withTargets(targets) + + table.gridPos.withX(gridPos.x) + + table.gridPos.withY(gridPos.y) + + table.gridPos.withH(gridPos.h) + + table.gridPos.withW(gridPos.w), + + withPagination(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.footer.TableFooterOptions.withEnablePagination(true), + + withAlerts(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + table.queryOptions.withTransformations([ + { + "id": "organize", + "options": { + "excludeByName": { + "_id": true, + "_index": true, + "_type": true, + "highlight": true, + "metricName": true, + "sort": true, + "uuid": true + }, + "indexByName": {}, + "renameByName": { + "_type": "Desciption", + "severity": "Severity", + "timestamp": "Timestamp" + } + } + } + ]), + + withLatencyTableOverrides(title, unit, targets, gridPos): + self.withPagination(title, unit, targets, gridPos) + + table.options.withSortBy([ + { + "desc": true, + "displayName": "Initialized" + } + ]) + + table.queryOptions.withTransformations([ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Average containersReadyLatency": "ContainersReady", + "Average initializedLatency": "Initialized", + "Average podReadyLatency": "Ready", + "Average schedulingLatency": "Scheduling", + "namespace.keyword": "Namespace", + "podName.keyword": "Pod" + } + } + } + ]) + + table.standardOptions.withOverrides([ + { + "matcher": { + "id": "byName", + "options": "nodeName.keyword" + }, + "properties": [ + { + "id": "custom.width", + "value": 412 + } + ] + } + ]), + + withJobSummary(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + table.options.withSortBy([]) + + table.queryOptions.withTransformations([ + { + "id": "organize", + "options": { + "excludeByName": { + "_id": true, + "_index": true, + "_type": true, + "highlight": true, + "jobConfig.churnDelay": true, + "jobConfig.churnDuration": true, + "jobConfig.churnPercent": true, + "jobConfig.cleanup": true, + "jobConfig.errorOnVerify": true, + "jobConfig.jobIterationDelay": true, + "jobConfig.jobIterations": false, + "jobConfig.jobPause": true, + "jobConfig.jobType": true, + "jobConfig.maxWaitTimeout": true, + "jobConfig.name": true, + "jobConfig.namespace": true, + "jobConfig.namespaceLabels.pod-security.kubernetes.io/audit": true, + "jobConfig.namespaceLabels.pod-security.kubernetes.io/enforce": true, + "jobConfig.namespaceLabels.pod-security.kubernetes.io/warn": true, + "jobConfig.namespaceLabels.security.openshift.io/scc.podSecurityLabelSync": true, + "jobConfig.namespaced": true, + "jobConfig.namespacedIterations": true, + "jobConfig.objects": true, + "jobConfig.podWait": true, + "jobConfig.preLoadImages": true, + "jobConfig.preLoadPeriod": true, + "jobConfig.verifyObjects": true, + "jobConfig.waitFor": true, + "jobConfig.waitForDeletion": true, + "jobConfig.waitWhenFinished": true, + "metadata.cloud-bulldozer": true, + "metadata.k8sVersion": true, + "metadata.ocpVersion": true, + "metadata.platform": true, + "metadata.sdnType": true, + "metadata.totalNodes": true, + "metricName": true, + "sort": true, + "timestamp": true, + "uuid": false + }, + "indexByName": { + "_id": 1, + "_index": 2, + "_type": 3, + "elapsedTime": 8, + "jobConfig.burst": 7, + "jobConfig.cleanup": 12, + "jobConfig.errorOnVerify": 13, + "jobConfig.jobIterationDelay": 14, + "jobConfig.jobIterations": 9, + "jobConfig.jobPause": 15, + "jobConfig.jobType": 10, + "jobConfig.maxWaitTimeout": 16, + "jobConfig.name": 5, + "jobConfig.namespace": 17, + "jobConfig.namespacedIterations": 18, + "jobConfig.objects": 19, + "jobConfig.podWait": 11, + "jobConfig.qps": 6, + "jobConfig.verifyObjects": 20, + "jobConfig.waitFor": 21, + "jobConfig.waitForDeletion": 22, + "jobConfig.waitWhenFinished": 23, + "metricName": 24, + "timestamp": 0, + "uuid": 4 + }, + "renameByName": { + "_type": "", + "elapsedTime": "Elapsed time", + "elapsedTimeNs": "Elapsed Time", + "highlight": "", + "jobConfig.burst": "Burst", + "jobConfig.churn": "Churn", + "jobConfig.churnDelay": "", + "jobConfig.cleanup": "", + "jobConfig.errorOnVerify": "errorOnVerify", + "jobConfig.iterationsPerNamespace": "iterationsPerNs", + "jobConfig.jobIterationDelay": "jobIterationDelay", + "jobConfig.jobIterations": "Iterations", + "jobConfig.jobPause": "jobPause", + "jobConfig.jobType": "Job Type", + "jobConfig.maxWaitTimeout": "maxWaitTImeout", + "jobConfig.name": "Name", + "jobConfig.namespace": "namespacePrefix", + "jobConfig.namespaceLabels.pod-security.kubernetes.io/audit": "", + "jobConfig.namespaced": "", + "jobConfig.namespacedIterations": "Namespaced iterations", + "jobConfig.objects": "", + "jobConfig.podWait": "podWait", + "jobConfig.preLoadImages": "Preload Images", + "jobConfig.preLoadPeriod": "", + "jobConfig.qps": "QPS", + "jobConfig.verifyObjects": "", + "metadata.platform": "Platform", + "metricName": "", + "timestamp": "", + "uuid": "UUID", + "version": "Kube-burner version" + } + } + } + ]) + + table.standardOptions.withOverrides([ + { + "matcher": { + "id": "byName", + "options": "Elapsed time" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Elapsed Time" + }, + "properties": [ + { + "id": "unit", + "value": "ns" + } + ] + } + ]), + + withClusterMetadata(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + table.options.withSortBy([]) + + table.standardOptions.withMappings([ + { + "options": { + "passed": { + "color": "green", + "index": 0 + } + }, + "type": "value" + } + ]) + + table.queryOptions.withTransformations([ + { + "id": "organize", + "options": { + "excludeByName": { + "_id": true, + "_index": true, + "_type": true, + "benchmark": false, + "clustertype": true, + "endDate": true, + "end_date": true, + "highlight": true, + "jobConfig.cleanup": true, + "jobConfig.errorOnVerify": true, + "jobConfig.jobIterationDelay": true, + "jobConfig.jobIterations": false, + "jobConfig.jobPause": true, + "jobConfig.maxWaitTimeout": true, + "jobConfig.namespace": true, + "jobConfig.namespaced": true, + "jobConfig.namespacedIterations": false, + "jobConfig.objects": true, + "jobConfig.preLoadPeriod": true, + "jobConfig.verifyObjects": true, + "jobConfig.waitFor": true, + "jobConfig.waitForDeletion": true, + "jobConfig.waitWhenFinished": true, + "metricName": true, + "ocp_version": true, + "platform": false, + "sdn_type": false, + "sort": true, + "timestamp": true, + "total_nodes": false, + "uuid": true, + "workload": true, + "workload_nodes_count": true, + "workload_nodes_type": true + }, + "indexByName": { + "_id": 1, + "_index": 2, + "_type": 3, + "benchmark": 5, + "clusterName": 8, + "endDate": 9, + "highlight": 6, + "infraNodesCount": 20, + "infraNodesType": 21, + "k8sVersion": 10, + "masterNodesType": 16, + "metricName": 13, + "ocpVersion": 11, + "passed": 15, + "platform": 12, + "sdnType": 14, + "sort": 7, + "timestamp": 0, + "totalNodes": 17, + "uuid": 4, + "workerNodesCount": 18, + "workerNodesType": 19 + }, + "renameByName": { + "_type": "", + "clusterName": "Cluster", + "elapsedTime": "Elapsed time", + "endDate": "", + "infraNodesCount": "infra count", + "infraNodesType": "infra type", + "infra_nodes_count": "Infra nodes", + "infra_nodes_type": "Infra flavor", + "jobConfig.burst": "Burst", + "jobConfig.cleanup": "", + "jobConfig.errorOnVerify": "errorOnVerify", + "jobConfig.jobIterationDelay": "jobIterationDelay", + "jobConfig.jobIterations": "Iterations", + "jobConfig.jobPause": "jobPause", + "jobConfig.jobType": "Job Type", + "jobConfig.maxWaitTimeout": "maxWaitTImeout", + "jobConfig.name": "Name", + "jobConfig.namespace": "namespacePrefix", + "jobConfig.namespaced": "", + "jobConfig.namespacedIterations": "Namespaced iterations", + "jobConfig.objects": "", + "jobConfig.podWait": "podWait", + "jobConfig.preLoadImages": "Preload Images", + "jobConfig.preLoadPeriod": "", + "jobConfig.qps": "QPS", + "jobConfig.verifyObjects": "", + "k8sVersion": "k8s version", + "k8s_version": "k8s version", + "masterNodesType": "master type", + "master_nodes_count": "Master nodes", + "master_nodes_type": "Masters flavor", + "metricName": "", + "ocpVersion": "OCP version", + "passed": "Passed", + "platform": "Platform", + "result": "Result", + "sdnType": "SDN", + "sdn_type": "SDN", + "timestamp": "", + "totalNodes": "total nodes", + "total_nodes": "Total nodes", + "uuid": "UUID", + "workerNodesCount": "worker count", + "workerNodesType": "worker type", + "worker_nodes_count": "Worker nodes", + "worker_nodes_type": "Workers flavor", + "workload": "", + "workload_nodes_count": "Workload nodes", + "workload_nodes_type": "Workload flavor" + } + } + } + ]) + + table.standardOptions.withOverrides([ + { + "matcher": { + "id": "byName", + "options": "passed" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "basic", + "type": "color-background" + } + } + ] + } + ]), + } +} \ No newline at end of file diff --git a/assets/kube-burner-report-ocp-wrapper/queries.libsonnet b/assets/kube-burner-report-ocp-wrapper/queries.libsonnet new file mode 100644 index 0000000..17b0194 --- /dev/null +++ b/assets/kube-burner-report-ocp-wrapper/queries.libsonnet @@ -0,0 +1,1589 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local variables = import './variables.libsonnet'; +local elasticsearch = g.query.elasticsearch; + +{ + averagePodLatency: { + query(): + elasticsearch.withAlias("{{field}}") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField('timestamp') + + elasticsearch.bucketAggs.DateHistogram.withId("5") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount(1) + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("podReadyLatency") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("4") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("schedulingLatency") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("3") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("initializedLatency") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("2") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("containersReadyLatency") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery('uuid.keyword: $uuid AND metricName.keyword: podLatencyMeasurement') + + elasticsearch.withTimeField('timestamp') + }, + podLatenciesSummary: { + query(): + elasticsearch.withAlias("$latencyPercentile {{term quantileName.keyword}}") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("quantileName.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount(0) + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Max.withField("$latencyPercentile") + + elasticsearch.metrics.MetricAggregationWithSettings.Max.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Max.withType('max'), + ]) + + elasticsearch.withQuery('uuid.keyword: $uuid AND metricName.keyword: podLatencyQuantilesMeasurement') + + elasticsearch.withTimeField('timestamp') + }, + podConditionsLatency: { + query(): + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("namespace.keyword") + + elasticsearch.bucketAggs.Terms.withId("7") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("100"), + elasticsearch.bucketAggs.Terms.withField("nodeName.keyword") + + elasticsearch.bucketAggs.Terms.withId("6") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("100"), + elasticsearch.bucketAggs.Terms.withField("podName.keyword") + + elasticsearch.bucketAggs.Terms.withId("5") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("100"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("schedulingLatency") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("4") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("initializedLatency") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("3") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("containersReadyLatency") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("2") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("podReadyLatency") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery('uuid.keyword: $uuid AND metricName.keyword: podLatencyMeasurement') + + elasticsearch.withQueryType('randomWalk') + + elasticsearch.withTimeField('timestamp') + }, + top10ContainerRuntimeNetworkSetupLatency: { + query(): + elasticsearch.withAlias("{{labels.node.keyword}}") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.node.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery('uuid.keyword: $uuid AND metricName.keyword: containerNetworkSetupLatency') + + elasticsearch.withTimeField('timestamp') + }, + schedulingThroughput: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery('uuid.keyword: $uuid AND metricName.keyword: schedulingThroughput') + + elasticsearch.withTimeField('timestamp') + }, + mastersCPUUtilization: { + queries(): [ + elasticsearch.withAlias("{{labels.instance.keyword}}") + + elasticsearch.withHide(false) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.instance.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.settings.withScript("_value * 100") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withType('sum'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: \"nodeCPU-Masters\" AND NOT labels.mode.keyword: idle AND NOT labels.mode.keyword: steal") + + elasticsearch.withTimeField('timestamp'), + elasticsearch.withAlias("Aggregated") + + elasticsearch.withHide(false) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.settings.withScript("_value * 100") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withType('sum'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: nodeCPU-Masters AND NOT labels.mode.keyword: idle AND NOT labels.mode.keyword: steal") + + elasticsearch.withTimeField('timestamp') + ] + }, + mastersMemoryUtilization: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withHide(false) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.instance.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(): [ + self.base("Utilization {{labels.instance.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: nodeMemoryUtilization-Masters"), + self.base("Total {{labels.instance.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: nodeMemoryTotal-Masters"), + elasticsearch.withAlias("Aggregated utilization") + + elasticsearch.withHide(false) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withType('sum'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: nodeMemoryUtilization-Masters") + + elasticsearch.withTimeField('timestamp') + ] + }, + nodeStatusSummary: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.condition.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: \"nodeStatus\"") + + elasticsearch.withTimeField('timestamp') + }, + podStatusSummary: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.phase.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: \"podStatusCount\"") + + elasticsearch.withTimeField('timestamp') + }, + kubeApiServerUsage: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("5") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + + elasticsearch.bucketAggs.Terms.withId("4") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.Terms.withField("labels.namespace.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(): [ + self.base("{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: \"containerCPU\" AND labels.container.keyword: kube-apiserver"), + self.base("Rss {{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: \"containerMemory\" AND labels.container.keyword: kube-apiserver"), + self.base("Rss {{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: \"containerMemory-Masters\" AND labels.container.keyword: kube-apiserver"), + self.base("{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: \"containerCPU-Masters\" AND labels.container.keyword: kube-apiserver"), + ] + }, + averageKubeApiServerUsage: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto'), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(): [ + self.base("Avg CPU kube-apiserver", "uuid.keyword: $uuid AND metricName: \"containerCPU\" AND labels.container.keyword: kube-apiserver"), + self.base("Avg Rss kube-apiserver", "uuid.keyword: $uuid AND metricName: \"containerMemory\" AND labels.container.keyword: kube-apiserver"), + ] + }, + activeKubeControllerManagerUsage: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("5") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("1"), + elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + + elasticsearch.bucketAggs.Terms.withId("4") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.Terms.withField("labels.namespace.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(): [ + self.base("{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: \"containerCPU\" AND labels.container.keyword: kube-controller-manager"), + self.base("Rss {{labels.namespace.keyword}}-{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: \"containerMemory\" AND labels.container.keyword: kube-controller-manager"), + self.base("{{labels.namespace.keyword}}-{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: \"containerCPU-Masters\" AND labels.container.keyword: kube-controller-manager"), + self.base("Rss {{labels.namespace.keyword}}-{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: \"containerMemory-Masters\" AND labels.container.keyword: kube-controller-manager"), + ] + }, + kubeSchedulerUsage: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("5") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + + elasticsearch.bucketAggs.Terms.withId("4") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.Terms.withField("labels.namespace.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(): [ + self.base("{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName: \"containerCPU\" AND labels.container.keyword: kube-scheduler"), + self.base("Rss {{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName: \"containerMemory\" AND labels.container.keyword: kube-scheduler"), + self.base("{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName: \"containerCPU-Masters\" AND labels.container.keyword: kube-scheduler"), + self.base("Rss {{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName: \"containerMemory-Masters\" AND labels.container.keyword: kube-scheduler"), + ] + }, + nodeCount: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.role.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.Count.withId("1") + + elasticsearch.metrics.Count.withType('count'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName: \"nodeRoles\"") + + elasticsearch.withTimeField('timestamp') + }, + aggregatesCount: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(): [ + self.base("Namespaces", "uuid.keyword: $uuid AND metricName: \"namespaceCount\" AND labels.phase: \"Active\""), + self.base("Services", "uuid.keyword: $uuid AND metricName: \"serviceCount\""), + self.base("Deployments", "uuid.keyword: $uuid AND metricName: \"deploymentCount\""), + self.base("Secrets", "uuid.keyword: $uuid AND metricName.keyword: \"secretCount\""), + self.base("ConfigMaps", "uuid.keyword: $uuid AND metricName.keyword: \"configmapCount\""), + ] + }, + openshiftVersion: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.settings.withSize("500") + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withType('raw_data'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: \"etcdVersion\"") + + elasticsearch.withTimeField('timestamp') + }, + jobSummary: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withHide(false) + + elasticsearch.withBucketAggs([]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.settings.withSize("500") + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withType('raw_data'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: jobSummary") + + elasticsearch.withTimeField('timestamp') + }, + clusterMetadata: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withHide(false) + + elasticsearch.withBucketAggs([]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.settings.withSize("500") + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withType('raw_data'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: clusterMetadata") + + elasticsearch.withTimeField('timestamp') + }, + alerts: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.settings.withSize("500") + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withType('raw_data'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: alert") + + elasticsearch.withTimeField('timestamp') + }, + ovnKubeMasterPodStats: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTimeZone("utc") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withType('sum'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(metric): [ + self.base("{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.pod.keyword: /ovnkube-master.*/"), + self.base("{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.pod.keyword: /ovnkube-control-plane.*/"), + ] + }, + ovnKubeMasterStats: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("4") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.DateHistogram.withField("labels.container.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTimeZone("utc") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges("0"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(metric): [ + self.base("{{labels.pod.keyword}}-{{labels.container.keyword}}", "uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.pod.keyword: /ovnkube-master.*/"), + self.base("{{labels.pod.keyword}}-{{labels.container.keyword}}", "uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.pod.keyword: /ovnkube-control-plane.*/"), + ] + }, + ovnKubeNodePodStats: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("5"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTimeZone("utc") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges("0"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withType('sum'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(metric): [ + self.base("{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.namespace.keyword: \"openshift-ovn-kubernetes\" AND labels.pod.keyword: /ovnkube-node.*/"), + elasticsearch.withAlias('Aggregated') + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTimeZone("utc") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges("0"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withType('sum'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.namespace.keyword: \"openshift-ovn-kubernetes\" AND labels.pod.keyword: /ovnkube-node.*/") + + elasticsearch.withTimeField('timestamp'), + ] + }, + ovnControllerStats: { + query(metric): + elasticsearch.withAlias("{{labels.pod.keyword}}") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("5"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTimeZone("utc") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges("0"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.namespace.keyword: \"openshift-ovn-kubernetes\" AND labels.pod.keyword: /ovnkube-node.*/ AND labels.container.keyword: \"ovn-controller\"") + + elasticsearch.withTimeField('timestamp') + }, + aggregatedOVNKubeMasterStats: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTimeZone("utc") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges("0"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withType('sum'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(metric): [ + self.base("", "uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.pod.keyword: /ovnkube-master.*/"), + self.base("","uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.pod.keyword: /ovnkube-control-plane.*/"), + ], + }, + aggregatedOVNKubeNodeStats: { + query(metric): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTimeZone("utc") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges("0"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withType('sum'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.namespace.keyword: \"openshift-ovn-kubernetes\" AND labels.pod.keyword: /ovnkube-node.*/") + + elasticsearch.withTimeField('timestamp') + }, + etcd99thLatencies: { + query(metric): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges("0"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: \""+ metric +"\"") + + elasticsearch.withTimeField('timestamp') + }, + etcdLeaderChanges: { + query(): + elasticsearch.withAlias("Etcd leader changes") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: etcdLeaderChangesRate") + + elasticsearch.withTimeField('timestamp') + }, + etcd99thNetworkPeerRT: { + query(): + elasticsearch.withAlias("{{labels.pod.keyword}} to {{labels.To.keyword}}") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("4") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.Terms.withField("labels.To.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: 99thEtcdRoundTripTimeSeconds") + + elasticsearch.withTimeField('timestamp') + }, + etcdResourceUtilization: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges("0"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(): [ + self.base("Rss {{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: containerMemory* AND labels.container.keyword: etcd"), + self.base("{{labels.pod.keyword}}", "uuid.keyword: $uuid AND metricName.keyword: containerCPU* AND labels.container.keyword: etcd"), + ], + }, + readOnlyAPILatencyResource: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.verb.keyword") + + elasticsearch.bucketAggs.Terms.withId("4") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount(0) + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.Terms.withField("labels.resource.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: resource") + + elasticsearch.withTimeField('timestamp') + }, + readOnlyAPILatencyNamespace: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.verb.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount(0) + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: namespace") + + elasticsearch.withTimeField('timestamp') + }, + readOnlyAPILatencyCluster: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.verb.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount(0) + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: cluster") + + elasticsearch.withTimeField('timestamp') + }, + readOnlyAPILatencyMutating: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.verb.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount(0) + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: mutatingAPICallsLatency") + + elasticsearch.withTimeField('timestamp') + }, + serviceSyncLatency: { + query(): + elasticsearch.withAlias("Latency") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: serviceSyncLatency") + + elasticsearch.withTimeField('timestamp') + }, + apiRequestRate: { + query(): + elasticsearch.withAlias("{{labels.verb.keyword}} {{labels.resource.keyword}}") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.resource.keyword") + + elasticsearch.bucketAggs.Terms.withId("4") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.Terms.withField("labels.verb.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: APIRequestRate") + + elasticsearch.withTimeField('timestamp') + }, + top5KubeletProcessByCpuUsage: { + queries(): [ + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.node.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("5"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: kubeletCPU") + + elasticsearch.withTimeField('timestamp'), + elasticsearch.withAlias("Average across workers") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: kubeletCPU") + + elasticsearch.withTimeField('timestamp'), + ], + }, + top5CrioProcessByCpuUsage: { + queries(): [ + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.node.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("5"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: crioCPU") + + elasticsearch.withTimeField('timestamp'), + elasticsearch.withAlias("Average across workers") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: crioCPU") + + elasticsearch.withTimeField('timestamp'), + ], + }, + top5KubeletRSSByMemoryUsage: { + queries(): [ + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.node.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("5"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: kubeletMemory") + + elasticsearch.withTimeField('timestamp'), + elasticsearch.withAlias("Average across workers") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1"), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: kubeletMemory") + + elasticsearch.withTimeField('timestamp'), + ], + }, + top5CrioRSSByMemoryUsage: { + queries(): [ + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.node.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("5"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: crioMemory") + + elasticsearch.withTimeField('timestamp'), + elasticsearch.withAlias("Average across workers") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: crioMemory") + + elasticsearch.withTimeField('timestamp'), + ], + }, + mastersContainerStats: { + query(metric): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("4") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto'), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.node.keyword: $master") + + elasticsearch.withTimeField('timestamp') + }, + masterCPU: { + query(): + elasticsearch.withAlias("{{labels.mode.keyword}}") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.mode.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withSettings({ + "script": { + "inline": "_value*100" + } + }) + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: \"nodeCPU-Masters\" AND labels.instance.keyword: $master") + + elasticsearch.withTimeField('timestamp') + }, + masterMemory: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(): [ + self.base("Available", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryAvailable-Masters\" AND labels.instance.keyword: $master"), + self.base("Total", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryTotal-Masters\" AND labels.instance.keyword: $master"), + self.base("Utilization", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryUtilization-Masters\" AND labels.instance.keyword: $master"), + ], + }, + workersContainerStats: { + query(metric): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("4") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto'), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: "+ metric +" AND labels.node.keyword: $worker") + + elasticsearch.withTimeField('timestamp') + }, + workerCPU: { + query(): + elasticsearch.withAlias("{{labels.mode.keyword}}") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.mode.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withSettings({ + "script": { + "inline": "_value*100" + } + }) + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: \"nodeCPU-Workers\" AND labels.instance.keyword: \"$worker\"") + + elasticsearch.withTimeField('timestamp') + }, + workerMemory: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(): [ + self.base("available", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryAvailable-Workers\" AND labels.instance.keyword: \"$worker\""), + self.base("Total", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryTotal-Workers\" AND labels.instance.keyword: $worker"), + self.base("Utilization", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryUtilization-Workers\" AND labels.instance.keyword: $worker"), + ], + }, + infraContainerStats: { + queries(metric): [ + elasticsearch.withAlias("{{labels.pod.keyword}}: {{labels.container.keyword}}") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("4") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("0"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName: \""+ metric +"\" AND labels.node.keyword: \"$infra\" AND labels.namespace.keyword: $namespace") + + elasticsearch.withTimeField('timestamp'), + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("4") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto'), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: "+ metric +"-Infra AND labels.node.keyword: $infra") + + elasticsearch.withTimeField('timestamp'), + ], + }, + infraCPU: { + query(): + elasticsearch.withAlias("{{labels.mode.keyword}}") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.mode.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withSettings({ + "script": { + "inline": "_value*100" + } + }) + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: \"nodeCPU-Infra\" AND labels.instance.keyword: $infra") + + elasticsearch.withTimeField('timestamp') + }, + infraMemory: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(): [ + self.base("available", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryAvailable-Infra\" AND labels.instance.keyword: $infra"), + self.base("Total", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryTotal-Infra\" AND labels.instance.keyword: $infra"), + ], + }, + aggWorkerNodeCpuUsage: { + query(): + elasticsearch.withAlias("{{labels.mode.keyword}}") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.mode.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withSettings({ + "script": { + "inline": "_value*100" + } + }) + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: \"nodeCPU-AggregatedWorkers\"") + + elasticsearch.withTimeField('timestamp') + }, + aggWorkerNodeMemory: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('30s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(): [ + self.base("Available", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryAvailable-AggregatedWorkers\""), + self.base("Total", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryTotal-AggregatedWorkers\""), + ], + }, + aggWorkerNodeContainerCpuUsage: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1'), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: containerCPU-AggregatedWorkers") + + elasticsearch.withTimeField('timestamp') + }, + aggWorkerNodeContainerMemoryUsage: { + query(): + elasticsearch.withAlias("") + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField("labels.pod.keyword") + + elasticsearch.bucketAggs.Terms.withId("3") + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + + elasticsearch.bucketAggs.Terms.settings.withSize("10"), + elasticsearch.bucketAggs.DateHistogram.withField("timestamp") + + elasticsearch.bucketAggs.DateHistogram.withId("2") + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto'), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Average.withField("value") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withId("1") + + elasticsearch.metrics.MetricAggregationWithSettings.Average.withType('avg'), + ]) + + elasticsearch.withQuery("uuid.keyword: $uuid AND metricName.keyword: containerMemory-AggregatedWorkers") + + elasticsearch.withTimeField('timestamp') + }, +} \ No newline at end of file diff --git a/assets/kube-burner-report-ocp-wrapper/variables.libsonnet b/assets/kube-burner-report-ocp-wrapper/variables.libsonnet new file mode 100644 index 0000000..2e5ba14 --- /dev/null +++ b/assets/kube-burner-report-ocp-wrapper/variables.libsonnet @@ -0,0 +1,77 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local var = g.dashboard.variable; + +{ + Datasource: + var.datasource.new('Datasource', 'elasticsearch') + + var.datasource.withRegex('/.*kube-burner.*/') + + var.query.generalOptions.withLabel('Datasource'), + + platform: + var.query.new('platform', "{\"find\": \"terms\", \"field\": \"metadata.platform.keyword\"}") + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.withRefresh(2) + + var.query.selectionOptions.withMulti() + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('Platform'), + + sdn: + var.query.new('sdn', "{\"find\": \"terms\", \"field\": \"metadata.sdnType.keyword\", \"query\": \"metadata.platform.keyword: $platform\"}") + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.withRefresh(1) + + var.query.selectionOptions.withMulti() + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('SDN type'), + + workload: + var.query.new('workload', "{\"find\": \"terms\", \"field\": \"jobConfig.name.keyword\", \"query\": \"metadata.platform.keyword: $platform AND metadata.sdnType.keyword: $sdn\"}") + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.withRefresh(1) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('Workload'), + + nodes: + var.query.new('nodes', "{\"find\": \"terms\", \"field\": \"metadata.totalNodes\", \"query\": \"metadata.platform.keyword: $platform AND metadata.sdnType.keyword: $sdn AND jobConfig.name.keyword: $workload\"}") + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.withRefresh(1) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('nodes'), + + uuid: + var.query.new('uuid', "{\"find\": \"terms\", \"field\": \"uuid.keyword\", \"query\": \"metadata.platform.keyword: $platform AND metadata.sdnType.keyword: $sdn AND jobConfig.name.keyword: $workload AND metadata.totalNodes: $nodes\"}") + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.withRefresh(2) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('UUID'), + + master: + var.query.new('master', "{ \"find\" : \"terms\", \"field\": \"labels.node.keyword\", \"query\": \"metricName.keyword: nodeRoles AND labels.role.keyword: master AND uuid.keyword: $uuid\"}") + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.withRefresh(2) + + var.query.selectionOptions.withMulti(true) + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('Master nodes'), + + worker: + var.query.new('worker', "{ \"find\" : \"terms\", \"field\": \"labels.node.keyword\", \"query\": \"metricName.keyword: nodeRoles AND labels.role.keyword: worker AND uuid.keyword: $uuid\"}") + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.withRefresh(2) + + var.query.selectionOptions.withMulti(true) + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('Worker nodes'), + + infra: + var.query.new('infra', "{ \"find\" : \"terms\", \"field\": \"labels.node.keyword\", \"query\": \"metricName.keyword: nodeRoles AND labels.role.keyword: infra AND uuid.keyword: $uuid\"}") + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.withRefresh(2) + + var.query.selectionOptions.withMulti(true) + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('Infra nodes'), + + latencyPercentile: + var.custom.new('latencyPercentile', ['P99', 'P95', 'P50'],) + + var.custom.generalOptions.withLabel('Latency percentile'), +} \ No newline at end of file diff --git a/dittybopper/deploy.sh b/dittybopper/deploy.sh index c90adb4..4ec6d73 100755 --- a/dittybopper/deploy.sh +++ b/dittybopper/deploy.sh @@ -41,7 +41,7 @@ END export PROMETHEUS_USER=internal export GRAFANA_ADMIN_PASSWORD=admin export GRAFANA_URL="http://admin:${GRAFANA_ADMIN_PASSWORD}@localhost:3000" -export SYNCER_IMAGE=${SYNCER_IMAGE:-"quay.io/cloud-bulldozer/syncer:latest"} # Syncer image +export SYNCER_IMAGE=${SYNCER_IMAGE:-"quay.io/cloud-bulldozer/dittybopper-syncer:latest"} # Syncer image export GRAFANA_IMAGE=${GRAFANA_IMAGE:-"quay.io/cloud-bulldozer/grafana:9.4.3"} # Syncer image # Set defaults for command options diff --git a/templates/CPT/k8s-perf.jsonnet b/templates/CPT/k8s-perf.jsonnet deleted file mode 100644 index 7308819..0000000 --- a/templates/CPT/k8s-perf.jsonnet +++ /dev/null @@ -1,499 +0,0 @@ -local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; -local prometheus = grafana.prometheus; - - -// Helper functions - -local genericGraphPanel(title, format) = grafana.graphPanel.new( - title=title, - datasource='$datasource', - format=format, - nullPointMode='null as zero', - sort='decreasing', - legend_alignAsTable=true, -); - -local genericGraphLegendPanel(title, format) = grafana.graphPanel.new( - title=title, - datasource='$datasource', - format=format, - legend_values=true, - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_hideEmpty=true, - legend_hideZero=true, - legend_sort='max', - nullPointMode='null as zero', - sort='decreasing', -); - - -local nodeMemory(nodeName) = genericGraphLegendPanel('System Memory: ' + nodeName, 'bytes').addTarget( - prometheus.target( - 'node_memory_Active_bytes{node=~"' + nodeName + '"}', - legendFormat='Active', - ) -).addTarget( - prometheus.target( - 'node_memory_MemTotal_bytes{node=~"' + nodeName + '"}', - legendFormat='Total', - ) -).addTarget( - prometheus.target( - 'node_memory_Cached_bytes{node=~"' + nodeName + '"} + node_memory_Buffers_bytes{node=~"' + nodeName + '"}', - legendFormat='Cached + Buffers', - ) -).addTarget( - prometheus.target( - 'node_memory_MemAvailable_bytes{node=~"' + nodeName + '"}', - legendFormat='Available', - ) -); - - -local nodeCPU(nodeName) = genericGraphLegendPanel('CPU Basic: ' + nodeName, 'percent').addTarget( - prometheus.target( - 'sum by (instance, mode)(rate(node_cpu_seconds_total{node=~"' + nodeName + '",job=~".*"}[$interval])) * 100', - legendFormat='Busy {{mode}}', - ) -); - - -local diskThroughput(nodeName) = genericGraphLegendPanel('Disk throughput: ' + nodeName, 'Bps').addTarget( - prometheus.target( - 'rate(node_disk_read_bytes_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', - legendFormat='{{ device }} - read', - ) -).addTarget( - prometheus.target( - 'rate(node_disk_written_bytes_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', - legendFormat='{{ device }} - write', - ) -); - -local diskIOPS(nodeName) = genericGraphLegendPanel('Disk IOPS: ' + nodeName, 'iops').addTarget( - prometheus.target( - 'rate(node_disk_reads_completed_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', - legendFormat='{{ device }} - read', - ) -).addTarget( - prometheus.target( - 'rate(node_disk_writes_completed_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', - legendFormat='{{ device }} - write', - ) -); - -local networkUtilization(nodeName) = genericGraphLegendPanel('Network Utilization: ' + nodeName, 'bps').addTarget( - prometheus.target( - 'rate(node_network_receive_bytes_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', - legendFormat='{{instance}} - {{device}} - RX', - ) -).addTarget( - prometheus.target( - 'rate(node_network_transmit_bytes_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', - legendFormat='{{instance}} - {{device}} - TX', - ) -); - -local networkPackets(nodeName) = genericGraphLegendPanel('Network Packets: ' + nodeName, 'pps').addTarget( - prometheus.target( - 'rate(node_network_receive_packets_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval])', - legendFormat='{{instance}} - {{device}} - RX', - ) -).addTarget( - prometheus.target( - 'rate(node_network_transmit_packets_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval])', - legendFormat='{{instance}} - {{device}} - TX', - ) -); - -local networkDrop(nodeName) = genericGraphLegendPanel('Network packets drop: ' + nodeName, 'pps').addTarget( - prometheus.target( - 'topk(10, rate(node_network_receive_drop_total{node=~"' + nodeName + '"}[$interval]))', - legendFormat='rx-drop-{{ device }}', - ) -).addTarget( - prometheus.target( - 'topk(10,rate(node_network_transmit_drop_total{node=~"' + nodeName + '"}[$interval]))', - legendFormat='tx-drop-{{ device }}', - ) -); - -local conntrackStats(nodeName) = genericGraphLegendPanel('Conntrack stats: ' + nodeName, '') - { - seriesOverrides: [{ - alias: 'conntrack_limit', - yaxis: 2, - }], - yaxes: [{ show: true }, { show: true }], -} - .addTarget( - prometheus.target( - 'node_nf_conntrack_entries{node=~"' + nodeName + '"}', - legendFormat='conntrack_entries', - ) -).addTarget( - prometheus.target( - 'node_nf_conntrack_entries_limit{node=~"' + nodeName + '"}', - legendFormat='conntrack_limit', - ) -); - -local top10ContainerCPU(nodeName) = genericGraphLegendPanel('Top 10 container CPU: ' + nodeName, 'percent').addTarget( - prometheus.target( - 'topk(10, sum(irate(container_cpu_usage_seconds_total{container!="POD",name!="",instance=~"' + nodeName + '",namespace!="",namespace=~"$namespace"}[$interval])) by (pod,container,namespace,name,service) * 100)', - legendFormat='{{ pod }}: {{ container }}', - ) -); - -local top10ContainerRSS(nodeName) = genericGraphLegendPanel('Top 10 container RSS: ' + nodeName, 'bytes').addTarget( - prometheus.target( - 'topk(10, container_memory_rss{container!="POD",name!="",instance=~"' + nodeName + '",namespace!="",namespace=~"$namespace"})', - legendFormat='{{ pod }}: {{ container }}', - ) -); - -local containerWriteBytes(nodeName) = genericGraphLegendPanel('Container fs write rate: ' + nodeName, 'Bps').addTarget( - prometheus.target( - 'sum(rate(container_fs_writes_bytes_total{device!~".+dm.+", node=~"' + nodeName + '", container!=""}[$interval])) by (device, container)', - legendFormat='{{ container }}: {{ device }}', - ) -); - -// Individual panel definitions - -// Monitoring Stack - -local promReplMemUsage = genericGraphLegendPanel('Prometheus Replica Memory usage', 'bytes').addTarget( - prometheus.target( - 'sum(container_memory_rss{pod="prometheus-k8s-1",namespace!="",name!="",container="prometheus"}) by (pod)', - legendFormat='{{pod}}', - ) -).addTarget( - prometheus.target( - 'sum(container_memory_rss{pod="prometheus-k8s-0",namespace!="",name!="",container="prometheus"}) by (pod)', - legendFormat='{{pod}}', - ) -); - -// Kubelet - -local kubeletCPU = genericGraphLegendPanel('Top 10 Kubelet CPU usage', 'percent').addTarget( - prometheus.target( - 'topk(10,rate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[$interval])*100)', - legendFormat='kubelet - {{node}}', - ) -); - -local crioCPU = genericGraphLegendPanel('Top 10 crio CPU usage', 'percent').addTarget( - prometheus.target( - 'topk(10,rate(process_cpu_seconds_total{service="kubelet",job="crio"}[$interval])*100)', - legendFormat='crio - {{node}}', - ) -); - -local kubeletMemory = genericGraphLegendPanel('Top 10 Kubelet memory usage', 'bytes').addTarget( - prometheus.target( - 'topk(10,process_resident_memory_bytes{service="kubelet",job="kubelet"})', - legendFormat='kubelet - {{node}}', - ) -); - -local crioMemory = genericGraphLegendPanel('Top 10 crio memory usage', 'bytes').addTarget( - prometheus.target( - 'topk(10,process_resident_memory_bytes{service="kubelet",job="crio"})', - legendFormat='crio - {{node}}', - ) -); - -// Cluster details - -local current_node_count = grafana.statPanel.new( - title='Current Node Count', - datasource='$datasource', - reducerFunction='last', -).addTarget( - prometheus.target( - 'sum(kube_node_info{})', - legendFormat='Number of nodes', - ) -).addTarget( - prometheus.target( - 'sum(kube_node_status_condition{status="true"}) by (condition) > 0', - legendFormat='Node: {{ condition }}', - ) -); - -local current_namespace_count = grafana.statPanel.new( - title='Current namespace Count', - datasource='$datasource', - reducerFunction='last', -).addTarget( - prometheus.target( - 'sum(kube_namespace_status_phase) by (phase)', - legendFormat='{{ phase }}', - ) -); - -local current_pod_count = grafana.statPanel.new( - title='Current Pod Count', - reducerFunction='last', - datasource='$datasource', -).addTarget( - prometheus.target( - 'sum(kube_pod_status_phase{}) by (phase) > 0', - legendFormat='{{ phase}} Pods', - ) -); - -local nodeCount = genericGraphPanel('Number of nodes', 'none').addTarget( - prometheus.target( - 'sum(kube_node_info{})', - legendFormat='Number of nodes', - ) -).addTarget( - prometheus.target( - 'sum(kube_node_status_condition{status="true"}) by (condition) > 0', - legendFormat='Node: {{ condition }}', - ) -); - -local nsCount = genericGraphPanel('Namespace count', 'none').addTarget( - prometheus.target( - 'sum(kube_namespace_status_phase) by (phase) > 0', - legendFormat='{{ phase }} namespaces', - ) -); - -local podCount = genericGraphPanel('Pod count', 'none').addTarget( - prometheus.target( - 'sum(kube_pod_status_phase{}) by (phase)', - legendFormat='{{phase}} pods', - ) -); - -local secretCmCount = genericGraphPanel('Secret & configmap count', 'none').addTarget( - prometheus.target( - 'count(kube_secret_info{})', - legendFormat='secrets', - ) -).addTarget( - prometheus.target( - 'count(kube_configmap_info{})', - legendFormat='Configmaps', - ) -); - -local deployCount = genericGraphPanel('Deployment count', 'none').addTarget( - prometheus.target( - 'count(kube_deployment_labels{})', - legendFormat='Deployments', - ) -); - - -local servicesCount = genericGraphPanel('Services count', 'none').addTarget( - prometheus.target( - 'count(kube_service_info{})', - legendFormat='Services', - ) -); - -local alerts = genericGraphPanel('Alerts', 'none').addTarget( - prometheus.target( - 'topk(10,sum(ALERTS{severity!="none"}) by (alertname, severity))', - legendFormat='{{severity}}: {{alertname}}', - ) -); - -local top10ContMem = genericGraphLegendPanel('Top 10 container RSS', 'bytes').addTarget( - prometheus.target( - 'topk(10, container_memory_rss{namespace!="",container!="POD",name!=""})', - legendFormat='{{ namespace }} - {{ name }}', - ) -); - -local podDistribution = genericGraphLegendPanel('Pod Distribution', 'none').addTarget( - prometheus.target( - 'count(kube_pod_info{}) by (exported_node)', - legendFormat='{{ node }}', - ) -); - -local top10ContCPU = genericGraphLegendPanel('Top 10 container CPU', 'percent').addTarget( - prometheus.target( - 'topk(10,irate(container_cpu_usage_seconds_total{namespace!="",container!="POD",name!=""}[$interval])*100)', - legendFormat='{{ namespace }} - {{ name }}', - ) -); - - -local goroutines_count = genericGraphPanel('Goroutines count', 'none').addTarget( - prometheus.target( - 'topk(10, sum(go_goroutines{}) by (job,instance))', - legendFormat='{{ job }} - {{ instance }}', - ) -); - -// Cluster operators - -local clusterOperatorsOverview = grafana.statPanel.new( - datasource='$datasource', - title='Cluster operators overview', -).addTarget( - prometheus.target( - 'sum by (condition)(cluster_operator_conditions{condition!=""})', - legendFormat='{{ condition }}', - ) -); - -local clusterOperatorsInformation = genericGraphLegendPanel('Cluster operators information', 'none').addTarget( - prometheus.target( - 'cluster_operator_conditions{name!="",reason!=""}', - legendFormat='{{name}} - {{reason}}', - ) -); - -local clusterOperatorsDegraded = genericGraphLegendPanel('Cluster operators degraded', 'none').addTarget( - prometheus.target( - 'cluster_operator_conditions{condition="Degraded",name!="",reason!=""}', - legendFormat='{{name}} - {{reason}}', - ) -); - - -// Dashboard - -grafana.dashboard.new( - 'k8s Performance', - description='Performance dashboard for Red Hat k8s', - time_from='now-1h', - timezone='utc', - refresh='30s', - editable='true', -) - - -// Templates - -.addTemplate( - grafana.template.datasource( - 'datasource', - 'prometheus', - '', - ) -) - -.addTemplate( - grafana.template.new( - '_worker_node', - '$datasource', - 'label_values(kube_node_labels{}, exported_node)', - '', - refresh=2, - ) { - label: 'Worker', - type: 'query', - multi: true, - includeAll: false, - }, -) - -.addTemplate( - grafana.template.new( - 'namespace', - '$datasource', - 'label_values(kube_pod_info, exported_namespace)', - '', - refresh=2, - ) { - label: 'Namespace', - type: 'query', - multi: false, - includeAll: true, - }, -) - - -.addTemplate( - grafana.template.new( - 'block_device', - '$datasource', - 'label_values(node_disk_written_bytes_total,device)', - '', - regex='/^(?:(?!dm|rb).)*$/', - refresh=2, - ) { - label: 'Block device', - type: 'query', - multi: true, - includeAll: true, - }, -) - - -.addTemplate( - grafana.template.new( - 'net_device', - '$datasource', - 'label_values(node_network_receive_bytes_total,device)', - '', - regex='/^((br|en|et).*)$/', - refresh=2, - ) { - label: 'Network device', - type: 'query', - multi: true, - includeAll: true, - }, -) - -.addTemplate( - grafana.template.new( - 'interval', - '$datasource', - '$__auto_interval_period', - label='interval', - refresh='time', - ) { - type: 'interval', - query: '2m,3m,4m,5m', - auto: false, - }, -) - -// Dashboard definition - -.addPanel(grafana.row.new(title='Cluster Details', collapse=true).addPanels( - [ - current_node_count { gridPos: { x: 0, y: 4, w: 8, h: 3 } }, - current_namespace_count { gridPos: { x: 8, y: 4, w: 8, h: 3 } }, - current_pod_count { gridPos: { x: 16, y: 4, w: 8, h: 3 } }, - nodeCount { gridPos: { x: 0, y: 12, w: 8, h: 8 } }, - nsCount { gridPos: { x: 8, y: 12, w: 8, h: 8 } }, - podCount { gridPos: { x: 16, y: 12, w: 8, h: 8 } }, - secretCmCount { gridPos: { x: 0, y: 20, w: 8, h: 8 } }, - deployCount { gridPos: { x: 8, y: 20, w: 8, h: 8 } }, - servicesCount { gridPos: { x: 16, y: 20, w: 8, h: 8 } }, - top10ContMem { gridPos: { x: 0, y: 28, w: 24, h: 8 } }, - top10ContCPU { gridPos: { x: 0, y: 36, w: 12, h: 8 } }, - goroutines_count { gridPos: { x: 12, y: 36, w: 12, h: 8 } }, - podDistribution { gridPos: { x: 0, y: 44, w: 24, h: 8 } }, - ] -), { gridPos: { x: 0, y: 3, w: 24, h: 1 } }) - -.addPanel(grafana.row.new(title='Node: $_worker_node', collapse=true, repeat='_worker_node').addPanels( - [ - nodeCPU('$_worker_node') { gridPos: { x: 0, y: 0, w: 12, h: 8 } }, - nodeMemory('$_worker_node') { gridPos: { x: 12, y: 0, w: 12, h: 8 } }, - diskThroughput('$_worker_node') { gridPos: { x: 0, y: 8, w: 12, h: 8 } }, - diskIOPS('$_worker_node') { gridPos: { x: 12, y: 8, w: 12, h: 8 } }, - networkUtilization('$_worker_node') { gridPos: { x: 0, y: 16, w: 12, h: 8 } }, - networkPackets('$_worker_node') { gridPos: { x: 12, y: 16, w: 12, h: 8 } }, - networkDrop('$_worker_node') { gridPos: { x: 0, y: 24, w: 12, h: 8 } }, - conntrackStats('$_worker_node') { gridPos: { x: 12, y: 24, w: 12, h: 8 } }, - top10ContainerCPU('$_worker_node') { gridPos: { x: 0, y: 32, w: 12, h: 8 } }, - top10ContainerRSS('$_worker_node') { gridPos: { x: 12, y: 32, w: 12, h: 8 } }, - ], -), { gridPos: { x: 0, y: 1, w: 0, h: 8 } }) diff --git a/templates/CPT/kube-burner-report-ocp-wrapper-v2.jsonnet b/templates/CPT/kube-burner-report-ocp-wrapper-v2.jsonnet new file mode 100644 index 0000000..179df36 --- /dev/null +++ b/templates/CPT/kube-burner-report-ocp-wrapper-v2.jsonnet @@ -0,0 +1,149 @@ +local panels = import '../../assets/kube-burner-report-ocp-wrapper/panels.libsonnet'; +local queries = import '../../assets/kube-burner-report-ocp-wrapper/queries.libsonnet'; +local variables = import '../../assets/kube-burner-report-ocp-wrapper/variables.libsonnet'; +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +g.dashboard.new('Kube-burner Report - OCP wrapper') ++ g.dashboard.withDescription(||| + Dashboard for kube-burner OCP wrapper +|||) ++ g.dashboard.withTags('kube-burner') ++ g.dashboard.time.withFrom('now-12h') ++ g.dashboard.time.withTo('now') ++ g.dashboard.withTimezone('utc') ++ g.dashboard.timepicker.withRefreshIntervals(['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d']) ++ g.dashboard.timepicker.withTimeOptions(['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d']) ++ g.dashboard.withRefresh('') ++ g.dashboard.withEditable(true) ++ g.dashboard.graphTooltip.withSharedCrosshair() ++ g.dashboard.withVariables([ + variables.Datasource, + variables.platform, + variables.sdn, + variables.workload, + variables.nodes, + variables.uuid, + variables.master, + variables.worker, + variables.infra, + variables.latencyPercentile, +]) ++ g.dashboard.withPanels([ + panels.stat.withLastNotNullCalcs('Node count', 'none', queries.nodeCount.query(), { x: 0, y: 0, w: 4, h: 3 }), + panels.stat.withLastNotNullCalcs('', '', queries.aggregatesCount.queries(), { x: 4, y: 0, w: 12, h: 3 }), + panels.stat.withFieldSummary('OpenShift version', '', '/^metadata\\.ocpVersion$/', queries.openshiftVersion.query(), { x: 16, y: 0, w: 6, h: 3 }), + panels.stat.withFieldSummary('Etcd version', '', '/^labels\\.cluster_version$/', queries.openshiftVersion.query(), { x: 22, y: 0, w: 2, h: 3 }), + panels.table.withJobSummary('', '', queries.jobSummary.query(), { x: 0, y: 3, w: 24, h: 3 }), + panels.table.withClusterMetadata('', '', queries.clusterMetadata.query(), { x: 0, y: 6, w: 24, h: 3 }), + panels.table.withAlerts('Alerts', '', queries.alerts.query(), { x: 0, y: 9, w: 24, h: 4 }), + g.panel.row.new('Cluster status') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.withMeanMax('Masters CPU utilization', 'percent', queries.mastersCPUUtilization.queries(), { x: 0, y: 14, w: 12, h: 9 }, -1), + panels.timeSeries.sortByMin('Masters Memory utilization', 'bytes', queries.mastersMemoryUtilization.queries(), { x: 12, y: 14, w: 12, h: 9 }, -1), + panels.timeSeries.sortMaxWithRightLegend('Node status summary', 'short', queries.nodeStatusSummary.query(), { x: 0, y: 23, w: 12, h: 8 }, null), + panels.timeSeries.maxWithBottomLegend('Pod status summary', 'none', queries.podStatusSummary.query(), { x: 12, y: 23, w: 12, h: 8 }, null), + panels.timeSeries.kupeApiCustomOverrides('Kube-apiserver usage', 'percent', queries.kubeApiServerUsage.queries(), { x: 0, y: 31, w: 12, h: 9 }, null), + panels.timeSeries.kupeApiAverageCustomOverrides('Average kube-apiserver usage', 'percent', queries.averageKubeApiServerUsage.queries(), { x: 12, y: 31, w: 12, h: 9 }, null), + panels.timeSeries.activeKubeControllerManagerOverrides('Active Kube-controller-manager usage', 'percent', queries.activeKubeControllerManagerUsage.queries(), { x: 0, y: 40, w: 12, h: 9 }, null), + panels.timeSeries.kubeSchedulerUsageOverrides('Kube-scheduler usage', 'percent', queries.kubeSchedulerUsage.queries(), { x: 12, y: 40, w: 12, h: 9 }, null), + ]), + g.panel.row.new('Pod latency stats') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.sortByMeanCommon('Average pod latency', 'ms', queries.averagePodLatency.query(), { x: 0, y: 13, w: 12, h: 8 }, -1), + panels.stat.withMeanThresholds('Pod latencies summary $latencyPercentile', 'ms', queries.podLatenciesSummary.query(), { x: 12, y: 15, w: 12, h: 8 }), + panels.table.withLatencyTableOverrides('Pod conditions latency', 'ms', queries.podConditionsLatency.query(), { x: 0, y: 23, w: 24, h: 10 }), + panels.timeSeries.sortByMax('Top 10 Container runtime network setup latency', 'µs', queries.top10ContainerRuntimeNetworkSetupLatency.query(), { x: 0, y: 33, w: 12, h: 9 }, -1), + panels.timeSeries.withMeanMax('Scheduling throughput', 'reqps', queries.schedulingThroughput.query(), { x: 12, y: 33, w: 12, h: 9 }, -1), + ]), + g.panel.row.new('OVNKubernetes') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.sortByMean('ovnkube-master pods CPU usage', 'percent', queries.ovnKubeMasterPodStats.queries('containerCPU'), { x: 0, y: 16, w: 12, h: 9 }, null), + panels.timeSeries.sortByMax('ovnkube-master pods Memory usage', 'bytes', queries.ovnKubeMasterPodStats.queries('containerMemory'), { x: 12, y: 16, w: 12, h: 9 }, null), + panels.timeSeries.sortByMean('ovnkube-master CPU usage', 'percent', queries.ovnKubeMasterStats.queries('containerCPU'), { x: 0, y: 25, w: 12, h: 8 }, null), + panels.timeSeries.sortByMaxCommon('ovnkube-master Memory Usage', 'bytes', queries.ovnKubeMasterStats.queries('containerMemory'), { x: 12, y: 25, w: 12, h: 8 }, null), + panels.timeSeries.sortByMean('ovnkube-node pods CPU Usage', 'percent', queries.ovnKubeNodePodStats.queries('containerCPU'), { x: 0, y: 33, w: 12, h: 8 }, null), + panels.timeSeries.sortByMean('ovnkube-node pods Memory Usage', 'bytes', queries.ovnKubeNodePodStats.queries('containerMemory'), { x: 12, y: 33, w: 12, h: 8 }, null), + panels.timeSeries.sortByMax('ovn-controller CPU Usage', 'percent', queries.ovnControllerStats.query('containerCPU'), { x: 0, y: 41, w: 12, h: 8 }, null), + panels.timeSeries.sortByMax('ovn-controller Memory Usage', 'bytes', queries.ovnControllerStats.query('containerMemory'), { x: 12, y: 41, w: 12, h: 8 }, null), + panels.timeSeries.withMeanMax('Aggregated OVNKube-master containers CPU', 'percent', queries.aggregatedOVNKubeMasterStats.queries('containerCPU'), { x: 0, y: 49, w: 12, h: 14 }, null), + panels.timeSeries.withMeanMax('Aggregated OVNKube-master containers memory', 'bytes', queries.aggregatedOVNKubeMasterStats.queries('containerMemory'), { x: 12, y: 49, w: 12, h: 14 }, null), + panels.timeSeries.withMeanMax('Aggregated OVNKube-node containers CPU', 'percent', queries.aggregatedOVNKubeNodeStats.query('containerCPU'), { x: 0, y: 63, w: 12, h: 14 }, null), + panels.timeSeries.sortByMeanCommon('Aggregated OVNKube-node containers Memory', 'bytes', queries.aggregatedOVNKubeNodeStats.query('containerMemory'), { x: 12, y: 63, w: 12, h: 14 }, null), + ]), + g.panel.row.new('etcd') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.etcd99thDiskWalLatencyOverrides('etcd 99th disk WAL fsync latency', 's', queries.etcd99thLatencies.query('99thEtcdDiskWalFsyncDurationSeconds'), { x: 0, y: 17, w: 12, h: 9 }, null), + panels.timeSeries.etcd99thCommitLatencyOverrides('etcd 99th disk backend commit latency', 's', queries.etcd99thLatencies.query('99thEtcdDiskBackendCommitDurationSeconds'), { x: 12, y: 17, w: 12, h: 9 }, null), + panels.timeSeries.base('Etcd leader changes', 'none', queries.etcdLeaderChanges.query(), { x: 0, y: 26, w: 12, h: 9 }, null), + panels.timeSeries.etcd99thNetworkPeerRTOverrides('Etcd 99th network peer roundtrip time', 's', queries.etcd99thNetworkPeerRT.query(), { x: 12, y: 26, w: 12, h: 9 }, null), + panels.timeSeries.etcdResouceUtilizationOverrides('Etcd resource utilization', 'percent', queries.etcdResourceUtilization.queries(), { x: 0, y: 35, w: 12, h: 9 }, null), + ]), + g.panel.row.new('API and Kubeproxy') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.readOnlyAPIRequestp99ResourceOverrides('Read Only API request P99 latency - resource scoped', 's', queries.readOnlyAPILatencyResource.query(), { x: 0, y: 18, w: 12, h: 8 }, -1), + panels.timeSeries.readOnlyAPIRequestp99NamespaceOverrides('Read Only API request P99 latency - namespace scoped', 's', queries.readOnlyAPILatencyNamespace.query(), { x: 12, y: 18, w: 12, h: 8 }, -1), + panels.timeSeries.readOnlyAPIRequestp99ClusterOverrides('Read Only API request P99 latency - cluster scoped', 's', queries.readOnlyAPILatencyCluster.query(), { x: 0, y: 26, w: 12, h: 8 }, -1), + panels.timeSeries.readOnlyAPIRequestp99MutatingOverrides('Mutating API request P99 latency', 's', queries.readOnlyAPILatencyMutating.query(), { x: 12, y: 26, w: 12, h: 8 }, -1), + panels.timeSeries.base('Service sync latency', 's', queries.serviceSyncLatency.query(), { x: 0, y: 34, w: 12, h: 10 }, null), + panels.timeSeries.sortByMax('API request rate', 'reqps', queries.apiRequestRate.query(), { x: 12, y: 34, w: 12, h: 10 }, -1), + ]), + g.panel.row.new('Cluster Kubelet & CRI-O') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.meanWithRightLegendCommons('Top 5 Kubelet process by CPU usage', 'percent', queries.top5KubeletProcessByCpuUsage.queries(), { x: 0, y: 19, w: 12, h: 8 }, null), + panels.timeSeries.meanWithRightLegendCommons('Top 5 CRI-O process by CPU usage', 'percent', queries.top5CrioProcessByCpuUsage.queries(), { x: 12, y: 19, w: 12, h: 8 }, null), + panels.timeSeries.maxMeanWithRightLegend('Top 5 Kubelet RSS by memory usage', 'bytes', queries.top5KubeletRSSByMemoryUsage.queries(), { x: 0, y: 27, w: 12, h: 8 }, -1), + panels.timeSeries.maxMeanWithRightLegend('Top 5 CRI-O RSS by memory usage', 'bytes', queries.top5CrioRSSByMemoryUsage.queries(), { x: 12, y: 27, w: 12, h: 8 }, null), + ]), + g.panel.row.new('Master: $master') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 0, h: 8 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withRepeat('master') + + g.panel.row.withPanels([ + panels.timeSeries.sortByMax('Container CPU usage $master', 'percent', queries.mastersContainerStats.query('containerCPU'), { x: 0, y: 20, w: 12, h: 9 }, null), + panels.timeSeries.maxWithBottomLegend('Container RSS memory $master', 'bytes', queries.mastersContainerStats.query('containerMemory'), { x: 12, y: 20, w: 12, h: 9 }, null), + panels.timeSeries.withCommonAggregationsRightPlacement('CPU $master', 'percent', queries.masterCPU.query(), { x: 0, y: 29, w: 12, h: 9 }, null), + panels.timeSeries.allWithRightLegend('Memory $master', 'bytes', queries.masterMemory.queries(), { x: 12, y: 29, w: 12, h: 9 }, null), + ]), + g.panel.row.new('Worker: $worker') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 0, h: 8 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withRepeat('worker') + + g.panel.row.withPanels([ + panels.timeSeries.sortByMax('Container CPU usage $worker', 'percent', queries.workersContainerStats.query('containerCPU'), { x: 0, y: 21, w: 12, h: 9 }, null), + panels.timeSeries.withMeanMax('Container RSS memory $worker', 'bytes', queries.workersContainerStats.query('containerMemory'), { x: 12, y: 21, w: 12, h: 9 }, null), + panels.timeSeries.workerCPUCustomOverrides('CPU $worker', 'percent', queries.workerCPU.query(), { x: 0, y: 30, w: 12, h: 8 }, null), + panels.timeSeries.maxWithRightLegend('Memory $worker', 'bytes', queries.workerMemory.queries(), { x: 12, y: 30, w: 12, h: 8 }, null), + ]), + g.panel.row.new('Infra: $infra') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 0, h: 8 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withRepeat('infra') + + g.panel.row.withPanels([ + panels.timeSeries.sortByMean('Container CPU usage $infra', 'percent', queries.infraContainerStats.queries('containerCPU'), { x: 0, y: 31, w: 12, h: 9 }, null), + panels.timeSeries.sortByMax('Container RSS memory $infra', 'bytes', queries.infraContainerStats.queries('containerMemory'), { x: 12, y: 31, w: 12, h: 9 }, null), + panels.timeSeries.meanWithRightLegend('CPU $infra', 'percent', queries.infraCPU.query(), { x: 0, y: 31, w: 12, h: 9 }, null), + panels.timeSeries.minMaxWithRightLegend('Memory $infra', 'bytes', queries.infraMemory.queries(), { x: 12, y: 31, w: 12, h: 9 }, null), + ]), + g.panel.row.new('Aggregated worker nodes usage (only in aggregated metrics profile)') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 0, h: 8 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withRepeat('_infra_node') + + g.panel.row.withPanels([ + panels.timeSeries.meanWithRightLegend('Avg CPU usage', 'percent', queries.aggWorkerNodeCpuUsage.query(), { x: 0, y: 23, w: 12, h: 9 }, -1), + panels.timeSeries.maxWithRightLegend('Avg Memory', 'bytes', queries.aggWorkerNodeMemory.queries(), { x: 12, y: 23, w: 12, h: 9 }, null), + panels.timeSeries.sortByMax('container CPU usage', 'percent', queries.aggWorkerNodeContainerCpuUsage.query(), { x: 0, y: 32, w: 12, h: 9 }, -1), + panels.timeSeries.sortByMax('Container memory RSS', 'bytes', queries.aggWorkerNodeContainerMemoryUsage.query(), { x: 12, y: 32, w: 12, h: 9 }, null), + ]), +]) diff --git a/templates/CPT/kube-burner.jsonnet b/templates/CPT/kube-burner.jsonnet deleted file mode 100644 index cdb5160..0000000 --- a/templates/CPT/kube-burner.jsonnet +++ /dev/null @@ -1,4568 +0,0 @@ -local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; -local es = grafana.elasticsearch; - -local worker_count = grafana.statPanel.new( - title='Node count', - datasource='$datasource1', - justifyMode='center' -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "nodeRoles"', - timeField='timestamp', - metrics=[{ - field: 'coun', - id: '1', - meta: {}, - settings: {}, - type: 'count', - }], - bucketAggs=[ - { - field: 'labels.role.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -).addThresholds([ - { color: 'green', value: null }, - { color: 'red', value: 80 }, -]); - - -local metric_count_panel = grafana.statPanel.new( - datasource='$datasource1', - justifyMode='center', - title=null -).addTarget( - // Namespaces count - es.target( - query='uuid.keyword: $uuid AND metricName: "namespaceCount" AND labels.phase: "Active"', - alias='Namespaces', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -).addTarget( - // Services count - es.target( - query='uuid.keyword: $uuid AND metricName: "serviceCount"', - alias='Services', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -).addTarget( - // Deployments count - es.target( - query='uuid.keyword: $uuid AND metricName: "deploymentCount"', - alias='Services', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -).addTarget( - // Secrets count - es.target( - query='uuid.keyword: $uuid AND metricName: "secretCount"', - alias='Services', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -).addTarget( - // ConfigMap count - es.target( - query='uuid.keyword: $uuid AND metricName: "configmapCount"', - alias='ConfigMaps', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -).addThresholds([ - { color: 'green', value: null }, - { color: 'red', value: 80 }, -]); - -local openshift_version_panel = grafana.statPanel.new( - title='OpenShift version', - datasource='$datasource1', - justifyMode='center', - reducerFunction='lastNotNull', - fields='/^labels\\.version$/' -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "clusterVersion"', - timeField='timestamp', - metrics=[{ - id: '1', - settings: { - size: '500', - }, - type: 'raw_data', - }], - ) -); - -local etcd_version_panel = grafana.statPanel.new( - title='Etcd version', - datasource='$datasource1', - justifyMode='center', - reducerFunction='lastNotNull', - fields='labels.cluster_version' -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "etcdVersion"', - timeField='timestamp', - metrics=[{ - id: '1', - settings: { - size: '500', - }, - type: 'raw_data', - }], - ) -); - - -// Next line -// TODO: Convert to new table format once jsonnet supports it. -// That would fix the text wrapping problem. -local summary_panel_1 = grafana.tablePanel.new( - datasource='$datasource1', - title=null, - styles=[ - { - pattern: 'uuid', - alias: 'UUID', - type: 'string', - }, - { - pattern: 'jobConfig.name', - alias: 'Name', - type: 'hidden', - }, - { - pattern: 'jobConfig.qps', - alias: 'QPS', - type: 'number', - }, - { - pattern: 'jobConfig.burst', - alias: 'Burst', - type: 'number', - }, - { - pattern: 'elapsedTime', - alias: 'Elapsed time', - type: 'number', - unit: 's', - }, - { - pattern: 'jobConfig.jobIterations', - alias: 'Iterations', - type: 'number', - }, - { - pattern: 'jobConfig.jobType', - alias: 'Job Type', - type: 'string', - }, - { - pattern: 'jobConfig.podWait', - alias: 'podWait', - type: 'hidden', - }, - { - pattern: 'jobConfig.namespacedIterations', - alias: 'Namespaced iterations', - type: 'hidden', - }, - { - pattern: 'jobConfig.preLoadImages', - alias: 'Preload Images', - type: 'boolean', - }, - { - pattern: '_id', - alias: '_id', - type: 'hidden', - }, - { - pattern: '_index', - alias: '_index', - type: 'hidden', - }, - { - pattern: '_type', - alias: '_type', - type: 'hidden', - }, - { - pattern: 'highlight', - alias: 'highlight', - type: 'hidden', - }, - { - pattern: '_type', - alias: '_type', - type: 'hidden', - }, - { - pattern: 'jobConfig.cleanup', - type: 'hidden', - }, - { - pattern: 'jobConfig.errorOnVerify', - alias: 'errorOnVerify', - type: 'hidden', - }, - { - pattern: 'jobConfig.jobIterationDelay', - alias: 'jobIterationDelay', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.jobPause', - alias: 'jobPause', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.maxWaitTimeout', - alias: 'maxWaitTimeout', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.namespace', - alias: 'namespacePrefix', - type: 'hidden', - }, - { - pattern: 'jobConfig.namespaced', - alias: 'jobConfig.namespaced', - type: 'hidden', - }, - { - pattern: 'jobConfig.objects', - alias: 'jobConfig.objects', - type: 'hidden', - }, - { - pattern: 'jobConfig.preLoadPeriod', - alias: 'jobConfig.preLoadPeriod', - type: 'hidden', - }, - { - pattern: 'jobConfig.verifyObjects', - alias: 'jobConfig.verifyObjects', - type: 'hidden', - }, - { - pattern: 'metricName', - alias: 'metricName', - type: 'hidden', - }, - { - pattern: 'timestamp', - alias: 'timestamp', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitFor', - alias: 'jobConfig.waitFor', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitForDeletion', - alias: 'jobConfig.waitForDeletion', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitWhenFinished', - alias: 'jobConfig.waitWhenFinished', - type: 'hidden', - }, - { - pattern: 'sort', - alias: 'sort', - type: 'hidden', - }, - ] -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "jobSummary"', - timeField='timestamp', - metrics=[{ - id: '1', - settings: { - size: '500', - }, - type: 'raw_data', - }], - ) -).addTransformation( - grafana.transformation.new('organize', options={ - indexByName: { - _id: 1, - _index: 2, - _type: 3, - elapsedTime: 8, - 'jobConfig.burst': 7, - 'jobConfig.cleanup': 12, - 'jobConfig.errorOnVerify': 13, - 'jobConfig.jobIterationDelay': 14, - 'jobConfig.jobIterations': 9, - 'jobConfig.jobPause': 15, - 'jobConfig.jobType': 10, - 'jobConfig.maxWaitTimeout': 16, - 'jobConfig.name': 5, - 'jobConfig.namespace': 17, - 'jobConfig.namespacedIterations': 18, - 'jobConfig.objects': 19, - 'jobConfig.podWait': 11, - 'jobConfig.qps': 6, - 'jobConfig.verifyObjects': 20, - 'jobConfig.waitFor': 21, - 'jobConfig.waitForDeletion': 22, - 'jobConfig.waitWhenFinished': 23, - metricName: 24, - timestamp: 0, - uuid: 4, - }, - }) -); - - -// TODO: Convert to new table format once jsonnet supports it. -// That would fix the text wrapping problem. -local summary_panel_2 = grafana.tablePanel.new( - datasource='$datasource1', - title=null, - styles=[ - { - pattern: 'k8s_version', - alias: 'k8s version', - type: 'string', - }, - { - pattern: 'result', - alias: 'Result', - type: 'string', - }, - { - pattern: 'sdn_type', - alias: 'SDN', - type: 'string', - }, - { - pattern: 'total_nodes', - alias: 'Total nodes', - type: 'number', - }, - { - pattern: 'master_nodes_count', - alias: 'Master nodes', - type: 'number', - }, - { - pattern: 'worker_nodes_count', - alias: 'Worker nodes', - type: 'number', - }, - { - pattern: 'infra_nodes_count', - alias: 'Infra nodes', - type: 'number', - }, - { - pattern: 'master_nodes_type', - alias: 'Masters flavor', - type: 'string', - }, - { - pattern: '_id', - alias: '_id', - type: 'hidden', - }, - { - pattern: '_index', - alias: '_index', - type: 'hidden', - }, - { - pattern: '_type', - alias: '_type', - type: 'hidden', - }, - { - pattern: 'benchmark', - alias: 'benchmark', - type: 'hidden', - }, - { - pattern: 'clustertype', - alias: 'clustertype', - type: 'hidden', - }, - { - pattern: 'end_date', - alias: 'end_date', - type: 'hidden', - }, - { - pattern: 'highlight', - alias: 'highlight', - type: 'hidden', - }, - { - pattern: 'jobConfig.cleanup', - alias: 'jobConfig.cleanup', - type: 'hidden', - }, - { - pattern: 'jobConfig.errorOnVerify', - alias: 'errorOnVerify', - type: 'hidden', - }, - { - pattern: 'jobConfig.jobIterationDelay', - alias: 'jobIterationDelay', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.jobPause', - alias: 'jobPause', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.maxWaitTimeout', - alias: 'maxWaitTimeout', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.namespace', - alias: 'namespacePrefix', - type: 'hidden', - }, - { - pattern: 'jobConfig.namespaced', - alias: 'jobConfig.namespaced', - type: 'hidden', - }, - { - pattern: 'jobConfig.objects', - alias: 'jobConfig.objects', - type: 'hidden', - }, - { - pattern: 'jobConfig.preLoadPeriod', - alias: 'jobConfig.preLoadPeriod', - type: 'hidden', - }, - { - pattern: 'jobConfig.verifyObjects', - alias: 'jobConfig.verifyObjects', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitFor', - alias: 'jobConfig.waitFor', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitForDeletion', - alias: 'jobConfig.waitForDeletion', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitWhenFinished', - alias: 'jobConfig.waitWhenFinished', - type: 'hidden', - }, - { - pattern: 'metricName', - alias: 'metricName', - type: 'hidden', - }, - { - pattern: 'ocp_version', - alias: 'ocp_version', - type: 'hidden', - }, - { - pattern: 'ocp_version', - alias: 'ocp_version', - type: 'hidden', - }, - { - pattern: 'sort', - alias: 'sort', - type: 'hidden', - }, - { - pattern: 'timestamp', - alias: 'timestamp', - type: 'hidden', - }, - { - pattern: 'uuid', - alias: 'uuid', - type: 'hidden', - }, - { - pattern: 'workload', - alias: 'workload', - type: 'hidden', - }, - { - pattern: 'worker_nodes_type', - alias: 'worker_nodes_type', - type: 'hidden', - }, - { - pattern: 'infra_nodes_type', - alias: 'infra_nodes_type', - type: 'hidden', - }, - { - pattern: 'platform', - alias: 'platform', - type: 'hidden', - }, - { - pattern: 'workload_nodes_count', - alias: 'workload_nodes_count', - type: 'hidden', - }, - { - pattern: 'workload_nodes_type', - alias: 'workload_nodes_type', - type: 'hidden', - }, - ] -).addTarget( - es.target( - query='uuid.keyword: $uuid AND result.keyword: *', - timeField='timestamp', - metrics=[{ - id: '1', - settings: { - size: '500', - }, - type: 'raw_data', - }], - ) -).addTransformation( - grafana.transformation.new('organize', options={ - indexByName: { - _id: 4, - _index: 5, - _type: 15, - benchmark: 17, - clustertype: 18, - end_date: 19, - highlight: 20, - infra_nodes_count: 9, - infra_nodes_type: 14, - k8s_version: 1, - master_nodes_count: 7, - master_nodes_type: 11, - ocp_version: 21, - platform: 22, - result: 2, - sdn_type: 3, - sort: 23, - timestamp: 0, - total_nodes: 6, - uuid: 16, - worker_nodes_count: 8, - worker_nodes_type: 12, - workload: 24, - workload_nodes_count: 10, - workload_nodes_type: 13, - }, - }) -); - -// First row: Cluster status -local masters_cpu = grafana.graphPanel.new( - title='Masters CPU utilization', - datasource='$datasource1', - legend_alignAsTable=true, - legend_avg=true, - legend_max=true, - percentage=true, - legend_values=true, - format='percent', -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Masters" AND NOT labels.mode.keyword: idle AND NOT labels.mode.keyword: steal', - timeField='timestamp', - alias='{{labels.instance.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: { - script: '_value * 100', - }, - type: 'sum', - }], - bucketAggs=[ - { - field: 'labels.instance.keyword', - fake: true, - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -local masters_memory = grafana.graphPanel.new( - title='Masters Memory utilization', - datasource='$datasource1', - legend_alignAsTable=true, - legend_avg=true, - legend_max=true, - legend_values=true, - format='bytes' -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Masters"', - timeField='timestamp', - alias='Available {{labels.instance.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'sum', - }], - bucketAggs=[ - { - field: 'labels.instance.keyword', - fake: true, - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -local node_status_summary = grafana.graphPanel.new( - title='Node Status Summary', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_current=true, - legend_values=true, - legend_rightSide=true, -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeStatus"', - timeField='timestamp', - alias='{{labels.condition.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.condition.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local pod_status_summary = grafana.graphPanel.new( - title='Pod Status Summary', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_current=true, - legend_values=true, -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "podStatusCount"', - timeField='timestamp', - alias='{{labels.phase.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.phase.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local kube_api_cpu = grafana.graphPanel.new( - title='Kube-apiserver CPU', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='Avg CPU {{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); -// TODO: When the feature is added to grafannet, style the average differently. - - -local kube_api_memory = grafana.graphPanel.new( - title='Kube-apiserver Memory', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='Rss {{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='Rss {{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='Avg Rss {{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); -// TODO: When the feature is added to grafannet, style the average differently. - - -local active_controller_manager_cpu = grafana.graphPanel.new( - title='Active Kube-controller-manager CPU', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-controller-manager', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '1', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-controller-manager', - timeField='timestamp', - alias='{{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '1', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local active_controller_manager_memory = grafana.graphPanel.new( - title='Active Kube-controller-manager memory', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-controller-manager', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '1', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-controller-manager', - timeField='timestamp', - alias='{{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '1', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local kube_scheduler_cpu = grafana.graphPanel.new( - title='Kube-scheduler CPU', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-scheduler', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-scheduler', - timeField='timestamp', - alias='{{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local kube_scheduler_memory = grafana.graphPanel.new( - title='Kube-scheduler memory', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-scheduler', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-scheduler', - timeField='timestamp', - alias='Rss {{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local hypershift_controlplane_cpu = grafana.graphPanel.new( - title='Hypershift Controlplane CPU Usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU-Controlplane"', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '20', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '20', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '4', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - - -local hypershift_controlplane_memory = grafana.graphPanel.new( - title='Hypershift Controlplane RSS memory Usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory-Controlplane"', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '20', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '20', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '4', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -// Pod latencies section -local average_pod_latency = grafana.graphPanel.new( - title='Average pod latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_min=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='ms', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: podLatencyMeasurement', - timeField='timestamp', - alias='{{field}}', - metrics=[ - { - field: 'podReadyLatency', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }, - { - field: 'schedulingLatency', - id: '3', - meta: {}, - settings: {}, - type: 'avg', - }, - { - field: 'initializedLatency', - id: '4', - meta: {}, - settings: {}, - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -local pod_latencies_summary = grafana.statPanel.new( - datasource='$datasource1', - justifyMode='center', - title='Pod latencies summary $latencyPercentile', - unit='ms', - colorMode='value', // Note: There isn't currently a way to set the color palette. -).addTarget( - // Namespaces count - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: podLatencyQuantilesMeasurement', - alias='$latencyPercentile {{term quantileName.keyword}}', - timeField='timestamp', - metrics=[{ - field: '$latencyPercentile', - id: '1', - meta: {}, - settings: {}, - type: 'max', - }], - bucketAggs=[ - { - fake: true, - field: 'quantileName.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '0', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -local pod_conditions_latency = grafana.tablePanel.new( - title='Pod conditions latency', - datasource='$datasource1', - transform='table', - styles=[ - { - pattern: 'Average containersReadyLatency', - alias: 'ContainersReady', - type: 'number', - unit: 'ms', - }, - { - pattern: 'Average initializedLatency', - alias: 'Initialized', - type: 'number', - unit: 'ms', - }, - { - pattern: 'Average podReadyLatency', - alias: 'Ready', - type: 'number', - unit: 'ms', - }, - { - pattern: 'Average schedulingLatency', - alias: 'Scheduling', - type: 'number', - unit: 'ms', - }, - { - pattern: 'namespace.keyword', - alias: 'Namespace', - type: 'string', - }, - { - pattern: 'podName.keyword', - alias: 'Pod', - type: 'string', - }, - { - pattern: 'nodeName.keyword', - alias: 'Node', - type: 'string', - }, - ], -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: podLatencyMeasurement', - timeField='timestamp', - metrics=[ - { - field: 'schedulingLatency', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }, - { - field: 'initializedLatency', - id: '3', - meta: {}, - settings: {}, - type: 'avg', - }, - { - field: 'containersReadyLatency', - id: '4', - meta: {}, - settings: {}, - type: 'avg', - }, - { - field: 'podReadyLatency', - id: '5', - meta: {}, - settings: {}, - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'namespace.keyword', - id: '6', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '5', - size: '100', - }, - type: 'terms', - }, - { - fake: true, - field: 'nodeName.keyword', - id: '7', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '100', - }, - type: 'terms', - }, - { - field: 'podName.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '5', - size: '100', - }, - type: 'terms', - }, - ], - ) -); - -local setup_latency = grafana.graphPanel.new( - title='Top 10 Container runtime network setup latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='µs', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: containerNetworkSetupLatency', - timeField='timestamp', - alias='{{labels.node.keyword}}', - metrics=[ - { - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.node.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local scheduling_throughput = grafana.graphPanel.new( - title='Scheduling throughput', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='reqps', -) - .addTarget( - es.target( - query='uuid: $uuid AND metricName.keyword: schedulingThroughput', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// OVN section -local ovnkube_master_cpu = grafana.graphPanel.new( - title='ovnkube-master CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.pod.keyword: /ovnkube-master.*/', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '3', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - - -local ovnkube_master_memory = grafana.graphPanel.new( - title='ovnkube-master Memory usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.pod.keyword: /ovnkube-master.*/', - timeField='timestamp', - alias='{{labels.pod.keyword}}', - metrics=[ - { - field: 'value', - id: '1', - type: 'sum', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '3', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -local ovnkube_controller_cpu = grafana.graphPanel.new( - title='ovn-controller CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.namespace.keyword: "openshift-ovn-kubernetes" AND labels.pod.keyword: /ovnkube-node.*/ AND labels.container.keyword: "ovn-controller"', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '3', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - - -local ovnkube_controller_memory = grafana.graphPanel.new( - title='ovn-controller Memory usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.namespace.keyword: "openshift-ovn-kubernetes" AND labels.pod.keyword: /ovnkube-node.*/ AND labels.container.keyword: "ovn-controller"', - timeField='timestamp', - alias='{{labels.pod.keyword}}', - metrics=[ - { - field: 'value', - id: '1', - type: 'sum', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '3', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - - -// ETCD section -local etcd_fsync_latency = grafana.graphPanel.new( - title='etcd 99th disk WAL fsync latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "99thEtcdDiskWalFsyncDurationSeconds"', - timeField='timestamp', - alias='{{labels.pod.keyword}}', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local etcd_commit_latency = grafana.graphPanel.new( - title='etcd 99th disk backend commit latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "99thEtcdDiskBackendCommitDurationSeconds"', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local etcd_leader_changes = grafana.graphPanel.new( - title='Etcd leader changes', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_values=true, - min=0, - format='s', -) - .addTarget( - es.target( - query='uuid: $uuid AND metricName.keyword: etcdLeaderChangesRate', - alias='Etcd leader changes', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '1', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local etcd_peer_roundtrip_time = grafana.graphPanel.new( - title='Etcd 99th network peer roundtrip time', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: 99thEtcdRoundTripTimeSeconds', - alias='{{labels.pod.keyword}} to {{labels.To.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - fake: true, - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - fake: true, - field: 'labels.To.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local etcd_cpu = grafana.graphPanel.new( - title='Etcd CPU utilization', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: etcd', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - fake: true, - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local etcd_memory = grafana.graphPanel.new( - title='Etcd memory utilization', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: etcd', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - fake: true, - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// API an Kubeproxy section - -local api_latency_read_only_resource = grafana.graphPanel.new( - title='Read Only API request P99 latency - resource scoped', - datasource='$datasource1', - legend_alignAsTable=true, - format='s', - legend_max=true, - legend_avg=true, - legend_values=true, -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: resource', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.verb.keyword', - id: '3', - settings: { - min_doc_count: 0, - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.resource.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local api_latency_read_only_namespace = grafana.graphPanel.new( - title='Read Only API request P99 latency - namespace scoped', - datasource='$datasource1', - legend_alignAsTable=true, - format='s', - legend_max=true, - legend_avg=true, - legend_values=true, -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: namespace', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.verb.keyword', - id: '3', - settings: { - min_doc_count: 0, - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local api_latency_read_only_cluster = grafana.graphPanel.new( - title='Read Only API request P99 latency - cluster scoped', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: cluster', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.verb.keyword', - id: '3', - settings: { - min_doc_count: 0, - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local api_latency_mutating = grafana.graphPanel.new( - title='Mutating API request P99 latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: mutatingAPICallsLatency', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.verb.keyword', - id: '3', - settings: { - min_doc_count: 0, - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local api_request_rate = grafana.graphPanel.new( - title='API request rate', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: APIRequestRate', - alias='{{labels.verb.keyword}} {{labels.resource.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.resource.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - fake: true, - field: 'labels.verb.keyword', - id: '3', - settings: { - min_doc_count: 0, - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local service_sync_latency = grafana.graphPanel.new( - title='Service sync latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid: $uuid AND metricName.keyword: kubeproxyP99ProgrammingLatency', - alias='Latency', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.instance.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid: $uuid AND metricName.keyword: serviceSyncLatency', - alias='Latency', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// Cluster Kubelet & CRI-O section -local kubelet_process_cpu = grafana.graphPanel.new( - title='Kubelet process CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: kubeletCPU', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.node.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local kubelet_process_memory = grafana.graphPanel.new( - title='Kubelet process RSS memory usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: kubeletMemory', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.node.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local cri_o_process_cpu = grafana.graphPanel.new( - title='CRI-O process CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: crioCPU', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.node.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local cri_o_process_memory = grafana.graphPanel.new( - title='CRI-O RSS memory usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: crioMemory', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.node.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// Master Node section - -local container_cpu_master = grafana.graphPanel.new( - title='Container CPU usage $master', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $master AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local container_memory_master = grafana.graphPanel.new( - title='Container RSS memory $master', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $master AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local cpu_master = grafana.graphPanel.new( - title='CPU $master', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_min=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Masters" AND labels.instance.keyword: $master', - alias='{{labels.mode.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - settings: { - script: { - inline: '_value*100', - }, - }, - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.mode.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local memory_master = grafana.graphPanel.new( - title='Memory $master', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Masters" AND labels.instance.keyword: $master', - alias='Available', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Masters" AND labels.instance.keyword: $master', - alias='Total', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Masters" AND labels.instance.keyword: $master', - alias='Utilization', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// Worker Node section - -local container_cpu_worker = grafana.graphPanel.new( - title='Container CPU usage $worker', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $worker AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local container_memory_worker = grafana.graphPanel.new( - title='Container RSS memory $worker', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $worker AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local cpu_worker = grafana.graphPanel.new( - title='CPU $worker', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_min=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Workers" AND labels.instance.keyword: $worker', - alias='{{labels.mode.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - settings: { - script: { - inline: '_value*100', - }, - }, - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.mode.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local memory_worker = grafana.graphPanel.new( - title='Memory $worker', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Workers" AND labels.instance.keyword: $worker', - alias='Available', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Workers" AND labels.instance.keyword: $worker', - alias='Total', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Workers" AND labels.instance.keyword: $worker', - alias='Utilization', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -// Infra Node section - -local container_cpu_infra = grafana.graphPanel.new( - title='Container CPU usage $infra', - datasource='$datasource1', - legend_alignAsTable=true, - legend_avg=true, - legend_max=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $infra AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local container_memory_infra = grafana.graphPanel.new( - title='Container RSS memory $infra', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $infra AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local cpu_infra = grafana.graphPanel.new( - title='CPU $infra', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_min=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Infra" AND labels.instance.keyword: $infra', - alias='{{labels.mode.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - settings: { - script: { - inline: '_value*100', - }, - }, - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.mode.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local memory_infra = grafana.graphPanel.new( - title='Memory $infra', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Infra" AND labels.instance.keyword: $infra', - alias='Available', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Infra" AND labels.instance.keyword: $infra', - alias='Total', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Infra" AND labels.instance.keyword: $infra', - alias='Utilization', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// Aggregated worker node usage section -local agg_avg_cpu = grafana.graphPanel.new( - title='Avg CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_avg=true, - legend_max=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-AggregatedWorkers"', - alias='{{labels.mode.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - settings: { - script: { - inline: '_value*100', - }, - }, - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.mode.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local agg_avg_mem = grafana.graphPanel.new( - title='Avg Memory', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-AggregatedWorkers"', - alias='Available', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-AggregatedWorkers"', - alias='Total', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local agg_container_cpu = grafana.graphPanel.new( - title='Container CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "containerCPU-AggregatedWorkers" AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}}: {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local agg_container_mem = grafana.graphPanel.new( - title='Container memory RSS', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "containerMemory-AggregatedWorkers" AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}}: {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - fake: true, - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -//Dashboard & Templates - -grafana.dashboard.new( - 'Kube-burner report v2', - description='', - editable='true', - time_from='now/y', - time_to='now', - timezone='utc', -) -.addTemplate( - grafana.template.datasource( - 'datasource1', - 'elasticsearch', - 'AWS Dev - ripsaw-kube-burner', - label='Datasource', - regex='/.*kube-burner.*/' - ) -) -.addTemplate( - grafana.template.new( - label='Platform', - name='platform', - current='All', - query='{"find": "terms", "field": "platform.keyword"}', - refresh=2, - multi=true, - includeAll=true, - datasource='$datasource1', - ) -) -.addTemplate( - grafana.template.new( - label='SDN type', - name='sdn', - current='All', - query='{"find": "terms", "field": "sdn_type.keyword"}', - refresh=2, - multi=true, - includeAll=true, - datasource='$datasource1', - ) -) -.addTemplate( - grafana.template.new( - label='Workload', - multi=true, - query='{"find": "terms", "field": "workload.keyword", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn"}', - refresh=1, - name='workload', - includeAll=false, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='Worker count', - multi=true, - query='{"find": "terms", "field": "worker_nodes_count", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn AND workload.keyword: $workload"}', - refresh=1, - name='worker_count', - includeAll=true, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='UUID', - multi=false, - query='{"find": "terms", "field": "uuid.keyword", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn AND workload.keyword: $workload AND worker_nodes_count: $worker_count"}', - refresh=2, - name='uuid', - includeAll=false, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='Master nodes', - multi=true, - query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: master AND uuid.keyword: $uuid"}', - refresh=2, - name='master', - includeAll=false, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='Worker nodes', - multi=true, - query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: worker AND uuid.keyword: $uuid"}', - refresh=2, - name='worker', - includeAll=false, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='Infra nodes', - multi=true, - query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: infra AND uuid.keyword: $uuid"}', - refresh=2, - name='infra', - includeAll=false, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='Namespace', - multi=true, - query='{ "find" : "terms", "field": "labels.namespace.keyword", "query": "labels.namespace.keyword: /openshift-.*/ AND uuid.keyword: $uuid"}', - refresh=2, - name='namespace', - includeAll=true, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.custom( - label='Latency percentile', - name='latencyPercentile', - current='P99', - query='P99, P95, P50', - multi=false, - includeAll=false, - ) -) -.addPanels( - [ - worker_count { gridPos: { x: 0, y: 0, w: 4, h: 3 } }, - metric_count_panel { gridPos: { x: 4, y: 0, w: 12, h: 3 } }, - openshift_version_panel { gridPos: { x: 16, y: 0, w: 6, h: 3 } }, - etcd_version_panel { gridPos: { x: 22, y: 0, w: 2, h: 3 } }, - summary_panel_1 { gridPos: { x: 0, y: 3, h: 2, w: 24 } }, - summary_panel_2 { gridPos: { x: 0, y: 5, h: 2, w: 24 } }, - ], -) -.addPanel( - grafana.row.new(title='Cluster status', collapse=true).addPanels( - [ - masters_cpu { gridPos: { x: 0, y: 8, w: 12, h: 9 } }, - masters_memory { gridPos: { x: 12, y: 8, w: 12, h: 9 } }, - node_status_summary { gridPos: { x: 0, y: 17, w: 12, h: 8 } }, - pod_status_summary { gridPos: { x: 12, y: 17, w: 12, h: 8 } }, - kube_api_cpu { gridPos: { x: 0, y: 25, w: 12, h: 9 } }, - kube_api_memory { gridPos: { x: 12, y: 25, w: 12, h: 9 } }, - active_controller_manager_cpu { gridPos: { x: 0, y: 34, w: 12, h: 9 } }, - active_controller_manager_memory { gridPos: { x: 12, y: 34, w: 12, h: 9 } }, - kube_scheduler_cpu { gridPos: { x: 0, y: 43, w: 12, h: 9 } }, - kube_scheduler_memory { gridPos: { x: 12, y: 43, w: 12, h: 9 } }, - hypershift_controlplane_cpu { gridPos: { x: 0, y: 52, w: 12, h: 9 } }, - hypershift_controlplane_memory { gridPos: { x: 12, y: 52, w: 12, h: 9 } }, - ] - ), { x: 0, y: 7, w: 24, h: 1 } -) -.addPanel( - // Panels below for uncollapsed row. - grafana.row.new(title='Pod latency stats', collapse=false), { x: 0, y: 8, w: 24, h: 1 } -) -.addPanels( - [ - average_pod_latency { gridPos: { x: 0, y: 9, w: 12, h: 8 } }, - pod_latencies_summary { gridPos: { x: 12, y: 9, w: 12, h: 8 } }, - pod_conditions_latency { gridPos: { x: 0, y: 17, w: 24, h: 10 } }, - setup_latency { gridPos: { x: 0, y: 27, w: 12, h: 9 } }, - scheduling_throughput { gridPos: { x: 12, y: 27, w: 12, h: 9 } }, - ] -) -.addPanel( - grafana.row.new(title='OVNKubernetes', collapse=true).addPanels( - [ - ovnkube_master_cpu { gridPos: { x: 0, y: 80, w: 12, h: 8 } }, - ovnkube_master_memory { gridPos: { x: 12, y: 80, w: 12, h: 8 } }, - ovnkube_controller_cpu { gridPos: { x: 0, y: 88, w: 12, h: 8 } }, - ovnkube_controller_memory { gridPos: { x: 12, y: 88, w: 12, h: 8 } }, - ] - ), { x: 0, y: 36, w: 24, h: 1 } -) -.addPanel( - grafana.row.new(title='etcd', collapse=false), { x: 0, y: 37, w: 24, h: 1 } -) -.addPanels( - [ - etcd_fsync_latency { gridPos: { x: 0, y: 38, w: 12, h: 9 } }, - etcd_commit_latency { gridPos: { x: 12, y: 38, w: 12, h: 9 } }, - etcd_leader_changes { gridPos: { x: 0, y: 47, w: 12, h: 9 } }, - etcd_peer_roundtrip_time { gridPos: { x: 12, y: 47, w: 12, h: 9 } }, - etcd_cpu { gridPos: { x: 0, y: 56, w: 12, h: 9 } }, - etcd_memory { gridPos: { x: 12, y: 56, w: 12, h: 9 } }, - ], -) -.addPanel( - grafana.row.new(title='API and Kubeproxy', collapse=false), { x: 0, y: 65, w: 24, h: 1 } -) -.addPanels( - [ - api_latency_read_only_resource { gridPos: { x: 0, y: 66, w: 12, h: 9 } }, - api_latency_read_only_namespace { gridPos: { x: 12, y: 66, w: 12, h: 9 } }, - api_latency_read_only_cluster { gridPos: { x: 0, y: 75, w: 12, h: 9 } }, - api_latency_mutating { gridPos: { x: 12, y: 75, w: 12, h: 9 } }, - api_request_rate { gridPos: { x: 0, y: 84, w: 12, h: 9 } }, - service_sync_latency { gridPos: { x: 12, y: 84, w: 12, h: 9 } }, - ], -) - -.addPanel( - grafana.row.new(title='Cluster Kubelet & CRI-O', collapse=false), { x: 0, y: 93, w: 24, h: 1 } -) -.addPanels( - [ - kubelet_process_cpu { gridPos: { x: 0, y: 94, w: 12, h: 8 } }, - kubelet_process_memory { gridPos: { x: 12, y: 94, w: 12, h: 8 } }, - cri_o_process_cpu { gridPos: { x: 0, y: 103, w: 12, h: 8 } }, - cri_o_process_memory { gridPos: { x: 12, y: 103, w: 12, h: 8 } }, - ], -) - -.addPanel( - grafana.row.new(title='Master: $master', collapse=true, repeat='$master').addPanels( - [ - container_cpu_master { gridPos: { x: 0, y: 112, w: 12, h: 9 } }, - container_memory_master { gridPos: { x: 12, y: 112, w: 12, h: 9 } }, - cpu_master { gridPos: { x: 0, y: 121, w: 12, h: 9 } }, - memory_master { gridPos: { x: 12, y: 121, w: 12, h: 9 } }, - ] - ), { x: 0, y: 111, w: 24, h: 1 } -) - -.addPanel( - grafana.row.new(title='Worker: $worker', collapse=true, repeat='$worker').addPanels( - [ - container_cpu_worker { gridPos: { x: 0, y: 112, w: 12, h: 9 } }, - container_memory_worker { gridPos: { x: 12, y: 112, w: 12, h: 9 } }, - cpu_worker { gridPos: { x: 0, y: 121, w: 12, h: 9 } }, - memory_worker { gridPos: { x: 12, y: 121, w: 12, h: 9 } }, - ] - ), { x: 0, y: 111, w: 24, h: 1 } -) - -.addPanel( - grafana.row.new(title='Infra: $infra', collapse=true, repeat='$infra').addPanels( - [ - container_cpu_infra { gridPos: { x: 0, y: 131, w: 12, h: 9 } }, - container_memory_infra { gridPos: { x: 12, y: 131, w: 12, h: 9 } }, - cpu_infra { gridPos: { x: 0, y: 140, w: 12, h: 9 } }, - memory_infra { gridPos: { x: 12, y: 140, w: 12, h: 9 } }, - ] - ), { x: 0, y: 130, w: 24, h: 1 } -) - -.addPanel( - grafana.row.new(title='Aggregated worker nodes usage (only in aggregated metrics profile)', collapse=true).addPanels( - [ - agg_avg_cpu { gridPos: { x: 0, y: 150, w: 12, h: 9 } }, - agg_avg_mem { gridPos: { x: 12, y: 150, w: 12, h: 9 } }, - agg_container_cpu { gridPos: { x: 0, y: 159, w: 12, h: 9 } }, - agg_container_mem { gridPos: { x: 12, y: 159, w: 12, h: 9 } }, - ] - ), { x: 0, y: 149, w: 24, h: 1 } -) diff --git a/templates/jsonnetfile.json b/templates/jsonnetfile.json new file mode 100644 index 0000000..2414c86 --- /dev/null +++ b/templates/jsonnetfile.json @@ -0,0 +1,15 @@ +{ + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-latest" + } + }, + "version": "main" + } + ], + "legacyImports": true +} diff --git a/templates/jsonnetfile.lock.json b/templates/jsonnetfile.lock.json new file mode 100644 index 0000000..d4b9d5c --- /dev/null +++ b/templates/jsonnetfile.lock.json @@ -0,0 +1,46 @@ +{ + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-latest" + } + }, + "version": "9087f21f41936c32967e4e3ae164627ba4055fed", + "sum": "sVzVlSLbxPkAurwO19YERigLMmRfVsViMcWC0gkTTqU=" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v10.0.0" + } + }, + "version": "9087f21f41936c32967e4e3ae164627ba4055fed", + "sum": "bWMuE3sTMnEFFwYmmwugyX+gjNsGea6NDwWxgzB19JQ=" + }, + { + "source": { + "git": { + "remote": "https://github.com/jsonnet-libs/docsonnet.git", + "subdir": "doc-util" + } + }, + "version": "fd8de9039b3c06da77d635a3a8289809a5bfb542", + "sum": "mFebrE9fhyAKW4zbnidcjVFupziN5LPA/Z7ii94uCzs=" + }, + { + "source": { + "git": { + "remote": "https://github.com/jsonnet-libs/xtd.git", + "subdir": "" + } + }, + "version": "0256a910ac71f0f842696d7bca0bf01ea77eb654", + "sum": "zBOpb1oTNvXdq9RF6yzTHill5r1YTJLBBoqyx4JYtAg=" + } + ], + "legacyImports": false +} From 79ec3621d0a6abfbbe63e0b8f31d40a004b9438d Mon Sep 17 00:00:00 2001 From: vishnuchalla Date: Thu, 5 Oct 2023 12:12:40 -0400 Subject: [PATCH 3/4] using go-jsonnet binaries --- Makefile | 9 ++++----- README.md | 6 +++--- assets/kube-burner-report-ocp-wrapper/queries.libsonnet | 8 ++++---- templates/CPT/kube-burner-report-ocp-wrapper-v2.jsonnet | 2 +- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index ccc2cec..71dcc65 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,4 @@ +JSONNET := https://github.com/google/go-jsonnet/releases/download/v0.20.0/go-jsonnet_0.20.0_Linux_x86_64.tar.gz JB = https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-linux-amd64 BINDIR = bin TEMPLATESDIR = templates @@ -7,15 +8,13 @@ SYNCER_IMG_TAG ?= quay.io/cloud-bulldozer/dittybopper-syncer:latest PLATFORM = linux/amd64,linux/arm64,linux/ppc64le,linux/s390x ifeq ($(filter v2,$(MAKECMDGOALS)),v2) - # Set variables and instructions for v2 - TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*-v2.jsonnet) + # Set variables and instructions for v2 + TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*-v2.jsonnet) LIBRARY_PATH := $(TEMPLATESDIR)/vendor - JSONNET := https://github.com/cloud-bulldozer/utils/releases/download/v0.0.0/jsonnet-bin-v0.20.0-linux.tar.gz else # Get all templates at $(TEMPLATESDIR) TEMPLATES := $(filter-out %-v2.jsonnet, $(wildcard $(TEMPLATESDIR)/**/*.jsonnet)) LIBRARY_PATH := $(TEMPLATESDIR)/grafonnet-lib - JSONNET := https://github.com/google/jsonnet/releases/download/v0.17.0/jsonnet-bin-v0.17.0-linux.tar.gz endif # Replace $(TEMPLATESDIR)/*.jsonnet by $(OUTPUTDIR)/*.json @@ -64,4 +63,4 @@ build-syncer-image: build podman build --platform=${PLATFORM} -f Dockerfile --manifest=${SYNCER_IMG_TAG} . push-syncer-image: - podman manifest push ${SYNCER_IMG_TAG} ${SYNCER_IMG_TAG} \ No newline at end of file + podman manifest push ${SYNCER_IMG_TAG} ${SYNCER_IMG_TAG} diff --git a/README.md b/README.md index 476c6bc..674f57d 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ mkdir -p bin rendered tmp git clone --depth 1 https://github.com/grafana/grafonnet-lib.git templates/grafonnet-lib Cloning into 'templates/grafonnet-lib'... Downloading jsonnet binary -curl -s -L https://github.com/google/jsonnet/releases/download/v0.15.0/jsonnet-bin-v0.15.0-linux.tar.gz | tar xzf - -C bin +curl -s -L https://github.com/google/go-jsonnet/releases/download/v0.20.0/go-jsonnet_0.20.0_Linux_x86_64.tar.gz | tar xz -C bin Formating template templates/ocp-performance.jsonnet bin/jsonnetfmt templates/ocp-performance.jsonnet > tmp/ocp-performance.jsonnet mv tmp/ocp-performance.jsonnet templates/ocp-performance.jsonnet @@ -39,7 +39,7 @@ Similarly for V2, the dashboards that are built using latest grafonnet library, $ make v2 mkdir -p bin rendered Downloading jsonnet binary -curl -s -L https://github.com/cloud-bulldozer/utils/releases/download/v0.0.0/jsonnet-bin-v0.20.0-linux.tar.gz | tar xz -C bin +curl -s -L https://github.com/google/go-jsonnet/releases/download/v0.20.0/go-jsonnet_0.20.0_Linux_x86_64.tar.gz | tar xz -C bin Downloading jb binary curl -s -L https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-linux-amd64 -o bin/jb chmod +x bin/jb @@ -55,7 +55,7 @@ mkdir -p rendered/General/ bin/jsonnet -J ./templates/vendor templates/General/ocp-performance-v2.jsonnet > rendered/General/ocp-performance-v2.json Rendered the v2 dashboards with latest grafonnet library ``` -Rest all operations reamin same as before. +All other operations remain same as before. In order to clean up the environment execute `make clean`. diff --git a/assets/kube-burner-report-ocp-wrapper/queries.libsonnet b/assets/kube-burner-report-ocp-wrapper/queries.libsonnet index 17b0194..1a4e13e 100644 --- a/assets/kube-burner-report-ocp-wrapper/queries.libsonnet +++ b/assets/kube-burner-report-ocp-wrapper/queries.libsonnet @@ -1203,14 +1203,14 @@ local elasticsearch = g.query.elasticsearch; + elasticsearch.bucketAggs.Terms.withId("4") + elasticsearch.bucketAggs.Terms.withType('terms') + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') - + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + elasticsearch.bucketAggs.Terms.settings.withSize("10"), elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + elasticsearch.bucketAggs.Terms.withId("3") + elasticsearch.bucketAggs.Terms.withType('terms') + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') - + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + elasticsearch.bucketAggs.Terms.settings.withSize("10"), elasticsearch.bucketAggs.DateHistogram.withField("timestamp") @@ -1289,14 +1289,14 @@ local elasticsearch = g.query.elasticsearch; + elasticsearch.bucketAggs.Terms.withId("4") + elasticsearch.bucketAggs.Terms.withType('terms') + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') - + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + elasticsearch.bucketAggs.Terms.settings.withMinDocCount("1") + elasticsearch.bucketAggs.Terms.settings.withSize("10"), elasticsearch.bucketAggs.Terms.withField("labels.container.keyword") + elasticsearch.bucketAggs.Terms.withId("3") + elasticsearch.bucketAggs.Terms.withType('terms') + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') - + elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + elasticsearch.bucketAggs.Terms.settings.withSize("10"), elasticsearch.bucketAggs.DateHistogram.withField("timestamp") diff --git a/templates/CPT/kube-burner-report-ocp-wrapper-v2.jsonnet b/templates/CPT/kube-burner-report-ocp-wrapper-v2.jsonnet index 179df36..22f9f22 100644 --- a/templates/CPT/kube-burner-report-ocp-wrapper-v2.jsonnet +++ b/templates/CPT/kube-burner-report-ocp-wrapper-v2.jsonnet @@ -14,7 +14,7 @@ g.dashboard.new('Kube-burner Report - OCP wrapper') + g.dashboard.timepicker.withRefreshIntervals(['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d']) + g.dashboard.timepicker.withTimeOptions(['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d']) + g.dashboard.withRefresh('') -+ g.dashboard.withEditable(true) ++ g.dashboard.withEditable(false) + g.dashboard.graphTooltip.withSharedCrosshair() + g.dashboard.withVariables([ variables.Datasource, From a5155fd1ddaab9e821e39295fc88032c0729419a Mon Sep 17 00:00:00 2001 From: Vishnu Challa Date: Thu, 19 Oct 2023 19:28:25 -0400 Subject: [PATCH 4/4] adding commit hash as a tag --- assets/kube-burner-report-ocp-wrapper/queries.libsonnet | 5 ----- dittybopper/syncer/entrypoint.py | 9 +++++++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/assets/kube-burner-report-ocp-wrapper/queries.libsonnet b/assets/kube-burner-report-ocp-wrapper/queries.libsonnet index 1a4e13e..0d5e0aa 100644 --- a/assets/kube-burner-report-ocp-wrapper/queries.libsonnet +++ b/assets/kube-burner-report-ocp-wrapper/queries.libsonnet @@ -1276,8 +1276,6 @@ local elasticsearch = g.query.elasticsearch; + elasticsearch.withQuery(query) + elasticsearch.withTimeField('timestamp'), queries(): [ - self.base("Available", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryAvailable-Masters\" AND labels.instance.keyword: $master"), - self.base("Total", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryTotal-Masters\" AND labels.instance.keyword: $master"), self.base("Utilization", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryUtilization-Masters\" AND labels.instance.keyword: $master"), ], }, @@ -1362,8 +1360,6 @@ local elasticsearch = g.query.elasticsearch; + elasticsearch.withQuery(query) + elasticsearch.withTimeField('timestamp'), queries(): [ - self.base("available", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryAvailable-Workers\" AND labels.instance.keyword: \"$worker\""), - self.base("Total", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryTotal-Workers\" AND labels.instance.keyword: $worker"), self.base("Utilization", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryUtilization-Workers\" AND labels.instance.keyword: $worker"), ], }, @@ -1534,7 +1530,6 @@ local elasticsearch = g.query.elasticsearch; + elasticsearch.withTimeField('timestamp'), queries(): [ self.base("Available", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryAvailable-AggregatedWorkers\""), - self.base("Total", "uuid.keyword: $uuid AND metricName.keyword: \"nodeMemoryTotal-AggregatedWorkers\""), ], }, aggWorkerNodeContainerCpuUsage: { diff --git a/dittybopper/syncer/entrypoint.py b/dittybopper/syncer/entrypoint.py index e9d607a..8142ed3 100644 --- a/dittybopper/syncer/entrypoint.py +++ b/dittybopper/syncer/entrypoint.py @@ -13,9 +13,10 @@ class GrafanaOperations: """ This class is responsible for Grafana operations """ - def __init__(self, grafana_url: str, input_directory: str): + def __init__(self, grafana_url: str, input_directory: str, git_commit_hash: str): self.grafana_url = grafana_url self.input_directory = input_directory + self.git_commit_hash = git_commit_hash self.dashboards = defaultdict(list) self.folder_map = dict() self.logger = logging.getLogger(__name__) @@ -102,6 +103,10 @@ def create_dashboards(self): for folder_id, files in self.dashboards.items(): for json_file in set(files): dashboard_json = self.read_dashboard_json(json_file) + if "tags" in dashboard_json.keys(): + dashboard_json["tags"].append(self.git_commit_hash) + else: + dashboard_json["tags"] = self.git_commit_hash try: response = requests.post( f"{self.grafana_url}/api/dashboards/db", @@ -122,7 +127,7 @@ def create_dashboards(self): raise Exception(f"Error creating dashboard '{dashboard_json['title']}' in folder '{folder_id}'. Message: {e}") if __name__ == '__main__': - grafana_operations = GrafanaOperations(os.environ.get("GRAFANA_URL"), os.environ.get("INPUT_DIR")) + grafana_operations = GrafanaOperations(os.environ.get("GRAFANA_URL"), os.environ.get("INPUT_DIR"), os.environ.get("GIT_COMMIT_HASH")) grafana_operations.fetch_all_dashboards() grafana_operations.create_dashboards() while True: