Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kube burner Report - OCP Wrapper Dashboard using latest grafonnet library. #80

Merged
merged 6 commits into from
Oct 24, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 29 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,48 +1,66 @@
JSONNET = https://github.com/google/jsonnet/releases/download/v0.17.0/jsonnet-bin-v0.17.0-linux.tar.gz
JSONNET := https://github.com/google/go-jsonnet/releases/download/v0.20.0/go-jsonnet_0.20.0_Linux_x86_64.tar.gz
JB = https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-linux-amd64
BINDIR = bin
TEMPLATESDIR = templates
OUTPUTDIR = rendered
ALLDIRS = $(BINDIR) $(OUTPUTDIR)
SYNCER_IMG_TAG ?= quay.io/cloud-bulldozer/dittybopper-syncer:latest
PLATFORM = linux/amd64,linux/arm64,linux/ppc64le,linux/s390x

# Get all templates at $(TEMPLATESDIR)
TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*.jsonnet)
ifeq ($(filter v2,$(MAKECMDGOALS)),v2)
# Set variables and instructions for v2
TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*-v2.jsonnet)
LIBRARY_PATH := $(TEMPLATESDIR)/vendor
else
# Get all templates at $(TEMPLATESDIR)
TEMPLATES := $(filter-out %-v2.jsonnet, $(wildcard $(TEMPLATESDIR)/**/*.jsonnet))
LIBRARY_PATH := $(TEMPLATESDIR)/grafonnet-lib
endif

# Replace $(TEMPLATESDIR)/*.jsonnet by $(OUTPUTDIR)/*.json
outputs := $(patsubst $(TEMPLATESDIR)/%.jsonnet, $(OUTPUTDIR)/%.json, $(TEMPLATES))

all: deps format build

deps: $(ALLDIRS) $(TEMPLATESDIR)/grafonnet-lib $(BINDIR)/jsonnet
deps: $(ALLDIRS) $(BINDIR)/jsonnet $(LIBRARY_PATH)

$(ALLDIRS):
mkdir -p $(ALLDIRS)

format: deps
$(BINDIR)/jsonnetfmt -i $(TEMPLATES)

build: deps $(TEMPLATESDIR)/grafonnet-lib $(outputs)
build: deps $(LIBRARY_PATH) $(outputs)

clean:
@echo "Cleaning up"
rm -rf $(ALLDIRS) $(TEMPLATESDIR)/grafonnet-lib

$(TEMPLATESDIR)/grafonnet-lib:
git clone --depth 1 https://github.com/grafana/grafonnet-lib.git $(TEMPLATESDIR)/grafonnet-lib
rm -rf $(ALLDIRS) $(TEMPLATESDIR)/vendor $(TEMPLATESDIR)/grafonnet-lib

$(BINDIR)/jsonnet:
@echo "Downloading jsonnet binary"
curl -s -L $(JSONNET) | tar xz -C $(BINDIR)
@echo "Downloading jb binary"
curl -s -L $(JB) -o $(BINDIR)/jb
chmod +x $(BINDIR)/jb

$(TEMPLATESDIR)/grafonnet-lib:
git clone --depth 1 https://github.com/grafana/grafonnet-lib.git $(TEMPLATESDIR)/grafonnet-lib

$(TEMPLATESDIR)/vendor:
@echo "Downloading vendor files"
cd $(TEMPLATESDIR) && ../$(BINDIR)/jb install && cd ../

# Build each template and output to $(OUTPUTDIR)
$(OUTPUTDIR)/%.json: $(TEMPLATESDIR)/%.jsonnet
@echo "Building template $<"
mkdir -p $(dir $@)
$(BINDIR)/jsonnet $< > $@
$(BINDIR)/jsonnet -J ./$(LIBRARY_PATH) $< > $@

v2: all
@echo "Rendered the v2 dashboards with latest grafonnet library"

build-syncer-image: build
podman build --platform=${PLATFORM} -f Dockerfile --manifest=${SYNCER_IMG_TAG} .

push-syncer-image:
podman manifest push ${SYNCER_IMG_TAG} ${SYNCER_IMG_TAG}
podman manifest push ${SYNCER_IMG_TAG} ${SYNCER_IMG_TAG}
26 changes: 24 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -25,7 +25,7 @@ mkdir -p bin rendered tmp
git clone --depth 1 https://github.com/grafana/grafonnet-lib.git templates/grafonnet-lib
Cloning into 'templates/grafonnet-lib'...
Downloading jsonnet binary
curl -s -L https://github.com/google/jsonnet/releases/download/v0.15.0/jsonnet-bin-v0.15.0-linux.tar.gz | tar xzf - -C bin
curl -s -L https://github.com/google/go-jsonnet/releases/download/v0.20.0/go-jsonnet_0.20.0_Linux_x86_64.tar.gz | tar xz -C bin
Formating template templates/ocp-performance.jsonnet
bin/jsonnetfmt templates/ocp-performance.jsonnet > tmp/ocp-performance.jsonnet
mv tmp/ocp-performance.jsonnet templates/ocp-performance.jsonnet
@@ -34,6 +34,28 @@ bin/jsonnet templates/ocp-performance.jsonnet > rendered/ocp-performance.json
$ ls rendered
ocp-ingress-controller.json ocp-performance.json
```
Similarly for V2, the dashboards that are built using latest grafonnet library, use
```
$ make v2
mkdir -p bin rendered
Downloading jsonnet binary
curl -s -L https://github.com/google/go-jsonnet/releases/download/v0.20.0/go-jsonnet_0.20.0_Linux_x86_64.tar.gz | tar xz -C bin
Downloading jb binary
curl -s -L https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-linux-amd64 -o bin/jb
chmod +x bin/jb
Downloading vendor files
cd templates && ../bin/jb install && cd ../
GET https://github.com/grafana/grafonnet/archive/f40876da40d787e9c288de0b547ac85597c781d9.tar.gz 200
GET https://github.com/grafana/grafonnet/archive/f40876da40d787e9c288de0b547ac85597c781d9.tar.gz 200
GET https://github.com/jsonnet-libs/docsonnet/archive/cc9df63eaca56f39e8e4e1ce192141333257b08d.tar.gz 200
GET https://github.com/jsonnet-libs/xtd/archive/0256a910ac71f0f842696d7bca0bf01ea77eb654.tar.gz 200
bin/jsonnetfmt -i templates/General/ocp-performance-v2.jsonnet
Building template templates/General/ocp-performance-v2.jsonnet
mkdir -p rendered/General/
bin/jsonnet -J ./templates/vendor templates/General/ocp-performance-v2.jsonnet > rendered/General/ocp-performance-v2.json
Rendered the v2 dashboards with latest grafonnet library
```
All other operations remain same as before.

In order to clean up the environment execute `make clean`.

@@ -73,4 +95,4 @@ In addition, make sure to lint your modifications to jsonnet files if you don't

The dashboards from this repository have been tested with the following versions:

- Grafana 7.X
- Grafana 7.X
836 changes: 836 additions & 0 deletions assets/kube-burner-report-ocp-wrapper/panels.libsonnet

Large diffs are not rendered by default.

1,584 changes: 1,584 additions & 0 deletions assets/kube-burner-report-ocp-wrapper/queries.libsonnet

Large diffs are not rendered by default.

77 changes: 77 additions & 0 deletions assets/kube-burner-report-ocp-wrapper/variables.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
local var = g.dashboard.variable;

{
Datasource:
var.datasource.new('Datasource', 'elasticsearch')
+ var.datasource.withRegex('/.*kube-burner.*/')
+ var.query.generalOptions.withLabel('Datasource'),

platform:
var.query.new('platform', "{\"find\": \"terms\", \"field\": \"metadata.platform.keyword\"}")
+ var.query.withDatasourceFromVariable(self.Datasource)
+ var.query.withRefresh(2)
+ var.query.selectionOptions.withMulti()
+ var.query.selectionOptions.withIncludeAll(false)
+ var.query.generalOptions.withLabel('Platform'),

sdn:
var.query.new('sdn', "{\"find\": \"terms\", \"field\": \"metadata.sdnType.keyword\", \"query\": \"metadata.platform.keyword: $platform\"}")
+ var.query.withDatasourceFromVariable(self.Datasource)
+ var.query.withRefresh(1)
+ var.query.selectionOptions.withMulti()
+ var.query.selectionOptions.withIncludeAll(false)
+ var.query.generalOptions.withLabel('SDN type'),

workload:
var.query.new('workload', "{\"find\": \"terms\", \"field\": \"jobConfig.name.keyword\", \"query\": \"metadata.platform.keyword: $platform AND metadata.sdnType.keyword: $sdn\"}")
+ var.query.withDatasourceFromVariable(self.Datasource)
+ var.query.withRefresh(1)
+ var.query.selectionOptions.withMulti(false)
+ var.query.selectionOptions.withIncludeAll(false)
+ var.query.generalOptions.withLabel('Workload'),

nodes:
var.query.new('nodes', "{\"find\": \"terms\", \"field\": \"metadata.totalNodes\", \"query\": \"metadata.platform.keyword: $platform AND metadata.sdnType.keyword: $sdn AND jobConfig.name.keyword: $workload\"}")
+ var.query.withDatasourceFromVariable(self.Datasource)
+ var.query.withRefresh(1)
+ var.query.selectionOptions.withMulti(false)
+ var.query.selectionOptions.withIncludeAll(false)
+ var.query.generalOptions.withLabel('nodes'),

uuid:
var.query.new('uuid', "{\"find\": \"terms\", \"field\": \"uuid.keyword\", \"query\": \"metadata.platform.keyword: $platform AND metadata.sdnType.keyword: $sdn AND jobConfig.name.keyword: $workload AND metadata.totalNodes: $nodes\"}")
+ var.query.withDatasourceFromVariable(self.Datasource)
+ var.query.withRefresh(2)
+ var.query.selectionOptions.withMulti(false)
+ var.query.selectionOptions.withIncludeAll(false)
+ var.query.generalOptions.withLabel('UUID'),

master:
var.query.new('master', "{ \"find\" : \"terms\", \"field\": \"labels.node.keyword\", \"query\": \"metricName.keyword: nodeRoles AND labels.role.keyword: master AND uuid.keyword: $uuid\"}")
+ var.query.withDatasourceFromVariable(self.Datasource)
+ var.query.withRefresh(2)
+ var.query.selectionOptions.withMulti(true)
+ var.query.selectionOptions.withIncludeAll(false)
+ var.query.generalOptions.withLabel('Master nodes'),

worker:
var.query.new('worker', "{ \"find\" : \"terms\", \"field\": \"labels.node.keyword\", \"query\": \"metricName.keyword: nodeRoles AND labels.role.keyword: worker AND uuid.keyword: $uuid\"}")
+ var.query.withDatasourceFromVariable(self.Datasource)
+ var.query.withRefresh(2)
+ var.query.selectionOptions.withMulti(true)
+ var.query.selectionOptions.withIncludeAll(false)
+ var.query.generalOptions.withLabel('Worker nodes'),

infra:
var.query.new('infra', "{ \"find\" : \"terms\", \"field\": \"labels.node.keyword\", \"query\": \"metricName.keyword: nodeRoles AND labels.role.keyword: infra AND uuid.keyword: $uuid\"}")
+ var.query.withDatasourceFromVariable(self.Datasource)
+ var.query.withRefresh(2)
+ var.query.selectionOptions.withMulti(true)
+ var.query.selectionOptions.withIncludeAll(false)
+ var.query.generalOptions.withLabel('Infra nodes'),

latencyPercentile:
var.custom.new('latencyPercentile', ['P99', 'P95', 'P50'],)
+ var.custom.generalOptions.withLabel('Latency percentile'),
}
9 changes: 7 additions & 2 deletions dittybopper/syncer/entrypoint.py
Original file line number Diff line number Diff line change
@@ -13,9 +13,10 @@ class GrafanaOperations:
"""
This class is responsible for Grafana operations
"""
def __init__(self, grafana_url: str, input_directory: str):
def __init__(self, grafana_url: str, input_directory: str, git_commit_hash: str):
self.grafana_url = grafana_url
self.input_directory = input_directory
self.git_commit_hash = git_commit_hash
self.dashboards = defaultdict(list)
self.folder_map = dict()
self.logger = logging.getLogger(__name__)
@@ -102,6 +103,10 @@ def create_dashboards(self):
for folder_id, files in self.dashboards.items():
for json_file in set(files):
dashboard_json = self.read_dashboard_json(json_file)
if "tags" in dashboard_json.keys():
dashboard_json["tags"].append(self.git_commit_hash)
else:
dashboard_json["tags"] = self.git_commit_hash
try:
response = requests.post(
f"{self.grafana_url}/api/dashboards/db",
@@ -122,7 +127,7 @@ def create_dashboards(self):
raise Exception(f"Error creating dashboard '{dashboard_json['title']}' in folder '{folder_id}'. Message: {e}")

if __name__ == '__main__':
grafana_operations = GrafanaOperations(os.environ.get("GRAFANA_URL"), os.environ.get("INPUT_DIR"))
grafana_operations = GrafanaOperations(os.environ.get("GRAFANA_URL"), os.environ.get("INPUT_DIR"), os.environ.get("GIT_COMMIT_HASH"))
grafana_operations.fetch_all_dashboards()
grafana_operations.create_dashboards()
while True:
149 changes: 149 additions & 0 deletions templates/CPT/kube-burner-report-ocp-wrapper-v2.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
local panels = import '../../assets/kube-burner-report-ocp-wrapper/panels.libsonnet';
local queries = import '../../assets/kube-burner-report-ocp-wrapper/queries.libsonnet';
local variables = import '../../assets/kube-burner-report-ocp-wrapper/variables.libsonnet';
local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';

g.dashboard.new('Kube-burner Report - OCP wrapper')
+ g.dashboard.withDescription(|||
Dashboard for kube-burner OCP wrapper
|||)
+ g.dashboard.withTags('kube-burner')
+ g.dashboard.time.withFrom('now-12h')
+ g.dashboard.time.withTo('now')
+ g.dashboard.withTimezone('utc')
+ g.dashboard.timepicker.withRefreshIntervals(['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'])
+ g.dashboard.timepicker.withTimeOptions(['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d'])
+ g.dashboard.withRefresh('')
+ g.dashboard.withEditable(false)
+ g.dashboard.graphTooltip.withSharedCrosshair()
+ g.dashboard.withVariables([
variables.Datasource,
variables.platform,
variables.sdn,
variables.workload,
variables.nodes,
variables.uuid,
variables.master,
variables.worker,
variables.infra,
variables.latencyPercentile,
])
+ g.dashboard.withPanels([
panels.stat.withLastNotNullCalcs('Node count', 'none', queries.nodeCount.query(), { x: 0, y: 0, w: 4, h: 3 }),
panels.stat.withLastNotNullCalcs('', '', queries.aggregatesCount.queries(), { x: 4, y: 0, w: 12, h: 3 }),
panels.stat.withFieldSummary('OpenShift version', '', '/^metadata\\.ocpVersion$/', queries.openshiftVersion.query(), { x: 16, y: 0, w: 6, h: 3 }),
panels.stat.withFieldSummary('Etcd version', '', '/^labels\\.cluster_version$/', queries.openshiftVersion.query(), { x: 22, y: 0, w: 2, h: 3 }),
panels.table.withJobSummary('', '', queries.jobSummary.query(), { x: 0, y: 3, w: 24, h: 3 }),
panels.table.withClusterMetadata('', '', queries.clusterMetadata.query(), { x: 0, y: 6, w: 24, h: 3 }),
panels.table.withAlerts('Alerts', '', queries.alerts.query(), { x: 0, y: 9, w: 24, h: 4 }),
g.panel.row.new('Cluster status')
+ g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 })
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withPanels([
panels.timeSeries.withMeanMax('Masters CPU utilization', 'percent', queries.mastersCPUUtilization.queries(), { x: 0, y: 14, w: 12, h: 9 }, -1),
panels.timeSeries.sortByMin('Masters Memory utilization', 'bytes', queries.mastersMemoryUtilization.queries(), { x: 12, y: 14, w: 12, h: 9 }, -1),
panels.timeSeries.sortMaxWithRightLegend('Node status summary', 'short', queries.nodeStatusSummary.query(), { x: 0, y: 23, w: 12, h: 8 }, null),
panels.timeSeries.maxWithBottomLegend('Pod status summary', 'none', queries.podStatusSummary.query(), { x: 12, y: 23, w: 12, h: 8 }, null),
panels.timeSeries.kupeApiCustomOverrides('Kube-apiserver usage', 'percent', queries.kubeApiServerUsage.queries(), { x: 0, y: 31, w: 12, h: 9 }, null),
panels.timeSeries.kupeApiAverageCustomOverrides('Average kube-apiserver usage', 'percent', queries.averageKubeApiServerUsage.queries(), { x: 12, y: 31, w: 12, h: 9 }, null),
panels.timeSeries.activeKubeControllerManagerOverrides('Active Kube-controller-manager usage', 'percent', queries.activeKubeControllerManagerUsage.queries(), { x: 0, y: 40, w: 12, h: 9 }, null),
panels.timeSeries.kubeSchedulerUsageOverrides('Kube-scheduler usage', 'percent', queries.kubeSchedulerUsage.queries(), { x: 12, y: 40, w: 12, h: 9 }, null),
]),
g.panel.row.new('Pod latency stats')
+ g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 })
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withPanels([
panels.timeSeries.sortByMeanCommon('Average pod latency', 'ms', queries.averagePodLatency.query(), { x: 0, y: 13, w: 12, h: 8 }, -1),
panels.stat.withMeanThresholds('Pod latencies summary $latencyPercentile', 'ms', queries.podLatenciesSummary.query(), { x: 12, y: 15, w: 12, h: 8 }),
panels.table.withLatencyTableOverrides('Pod conditions latency', 'ms', queries.podConditionsLatency.query(), { x: 0, y: 23, w: 24, h: 10 }),
panels.timeSeries.sortByMax('Top 10 Container runtime network setup latency', 'µs', queries.top10ContainerRuntimeNetworkSetupLatency.query(), { x: 0, y: 33, w: 12, h: 9 }, -1),
panels.timeSeries.withMeanMax('Scheduling throughput', 'reqps', queries.schedulingThroughput.query(), { x: 12, y: 33, w: 12, h: 9 }, -1),
]),
g.panel.row.new('OVNKubernetes')
+ g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 })
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withPanels([
panels.timeSeries.sortByMean('ovnkube-master pods CPU usage', 'percent', queries.ovnKubeMasterPodStats.queries('containerCPU'), { x: 0, y: 16, w: 12, h: 9 }, null),
panels.timeSeries.sortByMax('ovnkube-master pods Memory usage', 'bytes', queries.ovnKubeMasterPodStats.queries('containerMemory'), { x: 12, y: 16, w: 12, h: 9 }, null),
panels.timeSeries.sortByMean('ovnkube-master CPU usage', 'percent', queries.ovnKubeMasterStats.queries('containerCPU'), { x: 0, y: 25, w: 12, h: 8 }, null),
panels.timeSeries.sortByMaxCommon('ovnkube-master Memory Usage', 'bytes', queries.ovnKubeMasterStats.queries('containerMemory'), { x: 12, y: 25, w: 12, h: 8 }, null),
panels.timeSeries.sortByMean('ovnkube-node pods CPU Usage', 'percent', queries.ovnKubeNodePodStats.queries('containerCPU'), { x: 0, y: 33, w: 12, h: 8 }, null),
panels.timeSeries.sortByMean('ovnkube-node pods Memory Usage', 'bytes', queries.ovnKubeNodePodStats.queries('containerMemory'), { x: 12, y: 33, w: 12, h: 8 }, null),
panels.timeSeries.sortByMax('ovn-controller CPU Usage', 'percent', queries.ovnControllerStats.query('containerCPU'), { x: 0, y: 41, w: 12, h: 8 }, null),
panels.timeSeries.sortByMax('ovn-controller Memory Usage', 'bytes', queries.ovnControllerStats.query('containerMemory'), { x: 12, y: 41, w: 12, h: 8 }, null),
panels.timeSeries.withMeanMax('Aggregated OVNKube-master containers CPU', 'percent', queries.aggregatedOVNKubeMasterStats.queries('containerCPU'), { x: 0, y: 49, w: 12, h: 14 }, null),
panels.timeSeries.withMeanMax('Aggregated OVNKube-master containers memory', 'bytes', queries.aggregatedOVNKubeMasterStats.queries('containerMemory'), { x: 12, y: 49, w: 12, h: 14 }, null),
panels.timeSeries.withMeanMax('Aggregated OVNKube-node containers CPU', 'percent', queries.aggregatedOVNKubeNodeStats.query('containerCPU'), { x: 0, y: 63, w: 12, h: 14 }, null),
panels.timeSeries.sortByMeanCommon('Aggregated OVNKube-node containers Memory', 'bytes', queries.aggregatedOVNKubeNodeStats.query('containerMemory'), { x: 12, y: 63, w: 12, h: 14 }, null),
]),
g.panel.row.new('etcd')
+ g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 })
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withPanels([
panels.timeSeries.etcd99thDiskWalLatencyOverrides('etcd 99th disk WAL fsync latency', 's', queries.etcd99thLatencies.query('99thEtcdDiskWalFsyncDurationSeconds'), { x: 0, y: 17, w: 12, h: 9 }, null),
panels.timeSeries.etcd99thCommitLatencyOverrides('etcd 99th disk backend commit latency', 's', queries.etcd99thLatencies.query('99thEtcdDiskBackendCommitDurationSeconds'), { x: 12, y: 17, w: 12, h: 9 }, null),
panels.timeSeries.base('Etcd leader changes', 'none', queries.etcdLeaderChanges.query(), { x: 0, y: 26, w: 12, h: 9 }, null),
panels.timeSeries.etcd99thNetworkPeerRTOverrides('Etcd 99th network peer roundtrip time', 's', queries.etcd99thNetworkPeerRT.query(), { x: 12, y: 26, w: 12, h: 9 }, null),
panels.timeSeries.etcdResouceUtilizationOverrides('Etcd resource utilization', 'percent', queries.etcdResourceUtilization.queries(), { x: 0, y: 35, w: 12, h: 9 }, null),
]),
g.panel.row.new('API and Kubeproxy')
+ g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 })
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withPanels([
panels.timeSeries.readOnlyAPIRequestp99ResourceOverrides('Read Only API request P99 latency - resource scoped', 's', queries.readOnlyAPILatencyResource.query(), { x: 0, y: 18, w: 12, h: 8 }, -1),
panels.timeSeries.readOnlyAPIRequestp99NamespaceOverrides('Read Only API request P99 latency - namespace scoped', 's', queries.readOnlyAPILatencyNamespace.query(), { x: 12, y: 18, w: 12, h: 8 }, -1),
panels.timeSeries.readOnlyAPIRequestp99ClusterOverrides('Read Only API request P99 latency - cluster scoped', 's', queries.readOnlyAPILatencyCluster.query(), { x: 0, y: 26, w: 12, h: 8 }, -1),
panels.timeSeries.readOnlyAPIRequestp99MutatingOverrides('Mutating API request P99 latency', 's', queries.readOnlyAPILatencyMutating.query(), { x: 12, y: 26, w: 12, h: 8 }, -1),
panels.timeSeries.base('Service sync latency', 's', queries.serviceSyncLatency.query(), { x: 0, y: 34, w: 12, h: 10 }, null),
panels.timeSeries.sortByMax('API request rate', 'reqps', queries.apiRequestRate.query(), { x: 12, y: 34, w: 12, h: 10 }, -1),
]),
g.panel.row.new('Cluster Kubelet & CRI-O')
+ g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 })
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withPanels([
panels.timeSeries.meanWithRightLegendCommons('Top 5 Kubelet process by CPU usage', 'percent', queries.top5KubeletProcessByCpuUsage.queries(), { x: 0, y: 19, w: 12, h: 8 }, null),
panels.timeSeries.meanWithRightLegendCommons('Top 5 CRI-O process by CPU usage', 'percent', queries.top5CrioProcessByCpuUsage.queries(), { x: 12, y: 19, w: 12, h: 8 }, null),
panels.timeSeries.maxMeanWithRightLegend('Top 5 Kubelet RSS by memory usage', 'bytes', queries.top5KubeletRSSByMemoryUsage.queries(), { x: 0, y: 27, w: 12, h: 8 }, -1),
panels.timeSeries.maxMeanWithRightLegend('Top 5 CRI-O RSS by memory usage', 'bytes', queries.top5CrioRSSByMemoryUsage.queries(), { x: 12, y: 27, w: 12, h: 8 }, null),
]),
g.panel.row.new('Master: $master')
+ g.panel.row.withGridPos({ x: 0, y: 14, w: 0, h: 8 })
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withRepeat('master')
+ g.panel.row.withPanels([
panels.timeSeries.sortByMax('Container CPU usage $master', 'percent', queries.mastersContainerStats.query('containerCPU'), { x: 0, y: 20, w: 12, h: 9 }, null),
panels.timeSeries.maxWithBottomLegend('Container RSS memory $master', 'bytes', queries.mastersContainerStats.query('containerMemory'), { x: 12, y: 20, w: 12, h: 9 }, null),
panels.timeSeries.withCommonAggregationsRightPlacement('CPU $master', 'percent', queries.masterCPU.query(), { x: 0, y: 29, w: 12, h: 9 }, null),
panels.timeSeries.allWithRightLegend('Memory $master', 'bytes', queries.masterMemory.queries(), { x: 12, y: 29, w: 12, h: 9 }, null),
]),
g.panel.row.new('Worker: $worker')
+ g.panel.row.withGridPos({ x: 0, y: 14, w: 0, h: 8 })
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withRepeat('worker')
+ g.panel.row.withPanels([
panels.timeSeries.sortByMax('Container CPU usage $worker', 'percent', queries.workersContainerStats.query('containerCPU'), { x: 0, y: 21, w: 12, h: 9 }, null),
panels.timeSeries.withMeanMax('Container RSS memory $worker', 'bytes', queries.workersContainerStats.query('containerMemory'), { x: 12, y: 21, w: 12, h: 9 }, null),
panels.timeSeries.workerCPUCustomOverrides('CPU $worker', 'percent', queries.workerCPU.query(), { x: 0, y: 30, w: 12, h: 8 }, null),
panels.timeSeries.maxWithRightLegend('Memory $worker', 'bytes', queries.workerMemory.queries(), { x: 12, y: 30, w: 12, h: 8 }, null),
]),
g.panel.row.new('Infra: $infra')
+ g.panel.row.withGridPos({ x: 0, y: 14, w: 0, h: 8 })
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withRepeat('infra')
+ g.panel.row.withPanels([
panels.timeSeries.sortByMean('Container CPU usage $infra', 'percent', queries.infraContainerStats.queries('containerCPU'), { x: 0, y: 31, w: 12, h: 9 }, null),
panels.timeSeries.sortByMax('Container RSS memory $infra', 'bytes', queries.infraContainerStats.queries('containerMemory'), { x: 12, y: 31, w: 12, h: 9 }, null),
panels.timeSeries.meanWithRightLegend('CPU $infra', 'percent', queries.infraCPU.query(), { x: 0, y: 31, w: 12, h: 9 }, null),
panels.timeSeries.minMaxWithRightLegend('Memory $infra', 'bytes', queries.infraMemory.queries(), { x: 12, y: 31, w: 12, h: 9 }, null),
]),
g.panel.row.new('Aggregated worker nodes usage (only in aggregated metrics profile)')
+ g.panel.row.withGridPos({ x: 0, y: 14, w: 0, h: 8 })
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withRepeat('_infra_node')
+ g.panel.row.withPanels([
panels.timeSeries.meanWithRightLegend('Avg CPU usage', 'percent', queries.aggWorkerNodeCpuUsage.query(), { x: 0, y: 23, w: 12, h: 9 }, -1),
panels.timeSeries.maxWithRightLegend('Avg Memory', 'bytes', queries.aggWorkerNodeMemory.queries(), { x: 12, y: 23, w: 12, h: 9 }, null),
panels.timeSeries.sortByMax('container CPU usage', 'percent', queries.aggWorkerNodeContainerCpuUsage.query(), { x: 0, y: 32, w: 12, h: 9 }, -1),
panels.timeSeries.sortByMax('Container memory RSS', 'bytes', queries.aggWorkerNodeContainerMemoryUsage.query(), { x: 12, y: 32, w: 12, h: 9 }, null),
]),
])
15 changes: 15 additions & 0 deletions templates/jsonnetfile.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-latest"
}
},
"version": "main"
}
],
"legacyImports": true
}
46 changes: 46 additions & 0 deletions templates/jsonnetfile.lock.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-latest"
}
},
"version": "9087f21f41936c32967e4e3ae164627ba4055fed",
"sum": "sVzVlSLbxPkAurwO19YERigLMmRfVsViMcWC0gkTTqU="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-v10.0.0"
}
},
"version": "9087f21f41936c32967e4e3ae164627ba4055fed",
"sum": "bWMuE3sTMnEFFwYmmwugyX+gjNsGea6NDwWxgzB19JQ="
},
{
"source": {
"git": {
"remote": "https://github.com/jsonnet-libs/docsonnet.git",
"subdir": "doc-util"
}
},
"version": "fd8de9039b3c06da77d635a3a8289809a5bfb542",
"sum": "mFebrE9fhyAKW4zbnidcjVFupziN5LPA/Z7ii94uCzs="
},
{
"source": {
"git": {
"remote": "https://github.com/jsonnet-libs/xtd.git",
"subdir": ""
}
},
"version": "0256a910ac71f0f842696d7bca0bf01ea77eb654",
"sum": "zBOpb1oTNvXdq9RF6yzTHill5r1YTJLBBoqyx4JYtAg="
}
],
"legacyImports": false
}