@@ -23,7 +23,7 @@ local genericGraphLegendPanel(title, datasource, format) = grafana.graphPanel.ne
23
23
24
24
local hostedControlPlaneCPU = genericGraphLegendPanel('Hosted Control Plane CPU' , 'Cluster Prometheus' , 'percent' ).addTarget(
25
25
prometheus.target(
26
- 'topk(10,irate(container_cpu_usage_seconds_total{namespace=~"$namespace",container!="POD",name!=""}[1m ])*100)' ,
26
+ 'topk(10,irate(container_cpu_usage_seconds_total{namespace=~"$namespace",container!="POD",name!=""}[2m ])*100)' ,
27
27
legendFormat='{{pod}}/{{container}}' ,
28
28
)
29
29
);
@@ -179,7 +179,7 @@ local top10ContMemHosted = genericGraphLegendPanel('Top 10 Hosted Clusters conta
179
179
180
180
local top10ContCPUHosted = genericGraphLegendPanel('Top 10 Hosted Clusters container CPU' , 'Cluster Prometheus' , 'percent' ).addTarget(
181
181
prometheus.target(
182
- 'topk(10,irate(container_cpu_usage_seconds_total{namespace=~"^ocm-.*",container!="POD",name!=""}[1m ])*100)' ,
182
+ 'topk(10,irate(container_cpu_usage_seconds_total{namespace=~"^ocm-.*",container!="POD",name!=""}[2m ])*100)' ,
183
183
legendFormat='{{ namespace }} - {{ name }}' ,
184
184
)
185
185
);
@@ -193,7 +193,7 @@ local top10ContMemManagement = genericGraphLegendPanel('Top 10 Management Cluste
193
193
194
194
local top10ContCPUManagement = genericGraphLegendPanel('Top 10 Management Cluster container CPU' , 'Cluster Prometheus' , 'percent' ).addTarget(
195
195
prometheus.target(
196
- 'topk(10,irate(container_cpu_usage_seconds_total{namespace!="",container!="POD",name!=""}[1m ])*100)' ,
196
+ 'topk(10,irate(container_cpu_usage_seconds_total{namespace!="",container!="POD",name!=""}[2m ])*100)' ,
197
197
legendFormat='{{ namespace }} - {{ name }}' ,
198
198
)
199
199
);
@@ -207,7 +207,7 @@ local top10ContMemOBOManagement = genericGraphLegendPanel('Top 10 Management Clu
207
207
208
208
local top10ContCPUOBOManagement = genericGraphLegendPanel('Top 10 Management Cluster OBO NS Pods CPU' , 'Cluster Prometheus' , 'percent' ).addTarget(
209
209
prometheus.target(
210
- 'topk(10,irate(container_cpu_usage_seconds_total{namespace="openshift-observability-operator",container!="POD",name!=""}[1m ])*100)' ,
210
+ 'topk(10,irate(container_cpu_usage_seconds_total{namespace="openshift-observability-operator",container!="POD",name!=""}[2m ])*100)' ,
211
211
legendFormat='{{ pod }}/{{ container }}' ,
212
212
)
213
213
);
@@ -221,7 +221,7 @@ local top10ContMemHypershiftManagement = genericGraphLegendPanel('Top 10 Managem
221
221
222
222
local top10ContCPUHypershiftManagement = genericGraphLegendPanel('Top 10 Management Cluster Hypershift NS Pods CPU' , 'Cluster Prometheus' , 'percent' ).addTarget(
223
223
prometheus.target(
224
- 'topk(10,irate(container_cpu_usage_seconds_total{namespace="hypershift",container!="POD",name!=""}[1m ])*100)' ,
224
+ 'topk(10,irate(container_cpu_usage_seconds_total{namespace="hypershift",container!="POD",name!=""}[2m ])*100)' ,
225
225
legendFormat='{{ pod }}/{{ container }}' ,
226
226
)
227
227
);
@@ -341,7 +341,7 @@ local request_duration_99th_quantile_by_resource = grafana.graphPanel.new(
341
341
legend_hideZero=true ,
342
342
).addTarget(
343
343
prometheus.target(
344
- 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{namespace=~"$namespace",resource=~"$resource",subresource!="log",verb!~"WATCH|WATCHLIST|PROXY"}[1m ])) by(resource, namespace, verb, le))' ,
344
+ 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{namespace=~"$namespace",resource=~"$resource",subresource!="log",verb!~"WATCH|WATCHLIST|PROXY"}[2m ])) by(resource, namespace, verb, le))' ,
345
345
legendFormat='{{verb}}:{{resource}}/{{namespace}}' ,
346
346
)
347
347
);
@@ -665,7 +665,7 @@ local mgmt_heartbeat_failures = grafana.graphPanel.new(
665
665
)
666
666
).addTarget(
667
667
prometheus.target(
668
- 'etcd_server_health_failures{namespace=~"openshift-etcd",pod=~"$pod" }' ,
668
+ 'etcd_server_health_failures{namespace=~"openshift-etcd"}' ,
669
669
legendFormat='{{namespace}} - {{ pod }} health failures' ,
670
670
)
671
671
);
@@ -688,12 +688,12 @@ local mgmt_key_operations = grafana.graphPanel.new(
688
688
],
689
689
}.addTarget(
690
690
prometheus.target(
691
- 'rate(etcd_debugging_mvcc_put_total {namespace=~"openshift-etcd"}[2m])' ,
691
+ 'rate(etcd_mvcc_put_total {namespace=~"openshift-etcd"}[2m])' ,
692
692
legendFormat='{{namespace}} - {{ pod }} puts/s' ,
693
693
)
694
694
).addTarget(
695
695
prometheus.target(
696
- 'rate(etcd_debugging_mvcc_delete_total {namespace=~"openshift-etcd"}[2m])' ,
696
+ 'rate(etcd_mvcc_delete_total {namespace=~"openshift-etcd"}[2m])' ,
697
697
legendFormat='{{namespace}} - {{ pod }} deletes/s' ,
698
698
)
699
699
);
@@ -1095,7 +1095,7 @@ local keys = grafana.graphPanel.new(
1095
1095
datasource='OBO' ,
1096
1096
).addTarget(
1097
1097
prometheus.target(
1098
- 'etcd_debugging_mvcc_keys_total{namespace=~"$namespace",pod=~"$pod" }' ,
1098
+ 'etcd_debugging_mvcc_keys_total{namespace=~"$namespace"}' ,
1099
1099
legendFormat='{{namespace}} - {{ pod }} Num keys' ,
1100
1100
)
1101
1101
);
@@ -1105,7 +1105,7 @@ local compacted_keys = grafana.graphPanel.new(
1105
1105
datasource='OBO' ,
1106
1106
).addTarget(
1107
1107
prometheus.target(
1108
- 'etcd_debugging_mvcc_db_compaction_keys_total{namespace=~"$namespace",pod=~"$pod" }' ,
1108
+ 'etcd_debugging_mvcc_db_compaction_keys_total{namespace=~"$namespace"}' ,
1109
1109
legendFormat='{{namespace}} - {{ pod }} keys compacted' ,
1110
1110
)
1111
1111
);
@@ -1115,12 +1115,12 @@ local heartbeat_failures = grafana.graphPanel.new(
1115
1115
datasource='OBO' ,
1116
1116
).addTarget(
1117
1117
prometheus.target(
1118
- 'etcd_server_heartbeat_send_failures_total{namespace=~"$namespace",pod=~"$pod" }' ,
1118
+ 'etcd_server_heartbeat_send_failures_total{namespace=~"$namespace"}' ,
1119
1119
legendFormat='{{namespace}} - {{ pod }} heartbeat failures' ,
1120
1120
)
1121
1121
).addTarget(
1122
1122
prometheus.target(
1123
- 'etcd_server_health_failures{namespace=~"$namespace",pod=~"$pod" }' ,
1123
+ 'etcd_server_health_failures{namespace=~"$namespace"}' ,
1124
1124
legendFormat='{{namespace}} - {{ pod }} health failures' ,
1125
1125
)
1126
1126
);
@@ -1143,12 +1143,12 @@ local key_operations = grafana.graphPanel.new(
1143
1143
],
1144
1144
}.addTarget(
1145
1145
prometheus.target(
1146
- 'rate(etcd_debugging_mvcc_put_total {namespace=~"$namespace",pod=~"$pod "}[2m])' ,
1146
+ 'rate(etcd_mvcc_put_total {namespace=~"$namespace"}[2m])' ,
1147
1147
legendFormat='{{namespace}} - {{ pod }} puts/s' ,
1148
1148
)
1149
1149
).addTarget(
1150
1150
prometheus.target(
1151
- 'rate(etcd_debugging_mvcc_delete_total {namespace=~"$namespace",pod=~"$pod "}[2m])' ,
1151
+ 'rate(etcd_mvcc_delete_total {namespace=~"$namespace"}[2m])' ,
1152
1152
legendFormat='{{namespace}} - {{ pod }} deletes/s' ,
1153
1153
)
1154
1154
);
@@ -1170,12 +1170,12 @@ local slow_operations = grafana.graphPanel.new(
1170
1170
],
1171
1171
}.addTarget(
1172
1172
prometheus.target(
1173
- 'delta(etcd_server_slow_apply_total{namespace=~"$namespace",pod=~"$pod" }[2m])' ,
1173
+ 'delta(etcd_server_slow_apply_total{namespace=~"$namespace"}[2m])' ,
1174
1174
legendFormat='{{namespace}} - {{ pod }} slow applies' ,
1175
1175
)
1176
1176
).addTarget(
1177
1177
prometheus.target(
1178
- 'delta(etcd_server_slow_read_indexes_total{namespace=~"$namespace",pod=~"$pod" }[2m])' ,
1178
+ 'delta(etcd_server_slow_read_indexes_total{namespace=~"$namespace"}[2m])' ,
1179
1179
legendFormat='{{namespace}} - {{ pod }} slow read indexes' ,
1180
1180
)
1181
1181
);
@@ -1271,7 +1271,7 @@ local request_duration_99th_quantile = grafana.graphPanel.new(
1271
1271
legend_hideZero=true ,
1272
1272
).addTarget(
1273
1273
prometheus.target(
1274
- 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{namespace=~"$namespace",resource=~"$resource",subresource!="log",verb!~"WATCH|WATCHLIST|PROXY"}[1m ])) by(verb,le))' ,
1274
+ 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{namespace=~"$namespace",resource=~"$resource",subresource!="log",verb!~"WATCH|WATCHLIST|PROXY"}[2m ])) by(verb,le))' ,
1275
1275
legendFormat='{{verb}}' ,
1276
1276
)
1277
1277
);
@@ -1289,7 +1289,7 @@ local request_rate_by_instance = grafana.graphPanel.new(
1289
1289
legend_hideZero=true ,
1290
1290
).addTarget(
1291
1291
prometheus.target(
1292
- 'sum(rate(apiserver_request_total{namespace=~"$namespace",resource=~"$resource",code=~"$code",verb=~"$verb"}[1m ])) by(instance)' ,
1292
+ 'sum(rate(apiserver_request_total{namespace=~"$namespace",resource=~"$resource",code=~"$code",verb=~"$verb"}[2m ])) by(instance)' ,
1293
1293
legendFormat='{{instance}}' ,
1294
1294
)
1295
1295
);
@@ -1307,7 +1307,7 @@ local request_duration_99th_quantile_by_resource = grafana.graphPanel.new(
1307
1307
legend_hideZero=true ,
1308
1308
).addTarget(
1309
1309
prometheus.target(
1310
- 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{namespace=~"$namespace",resource=~"$resource",subresource!="log",verb!~"WATCH|WATCHLIST|PROXY"}[1m ])) by(resource,le))' ,
1310
+ 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{namespace=~"$namespace",resource=~"$resource",subresource!="log",verb!~"WATCH|WATCHLIST|PROXY"}[2m ])) by(resource,le))' ,
1311
1311
legendFormat='{{resource}}' ,
1312
1312
)
1313
1313
);
@@ -1325,7 +1325,7 @@ local request_rate_by_resource = grafana.graphPanel.new(
1325
1325
legend_hideZero=true ,
1326
1326
).addTarget(
1327
1327
prometheus.target(
1328
- 'sum(rate(apiserver_request_total{namespace=~"$namespace",resource=~"$resource",code=~"$code",verb=~"$verb"}[1m ])) by(resource)' ,
1328
+ 'sum(rate(apiserver_request_total{namespace=~"$namespace",resource=~"$resource",code=~"$code",verb=~"$verb"}[2m ])) by(resource)' ,
1329
1329
legendFormat='{{resource}}' ,
1330
1330
)
1331
1331
);
@@ -1335,12 +1335,12 @@ local request_duration_read_write = grafana.graphPanel.new(
1335
1335
datasource='OBO' ,
1336
1336
).addTarget(
1337
1337
prometheus.target(
1338
- 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{namespace=~"$namespace",resource=~"$resource",verb=~"LIST|GET"}[1m ])) by(le))' ,
1338
+ 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{namespace=~"$namespace",resource=~"$resource",verb=~"LIST|GET"}[2m ])) by(le))' ,
1339
1339
legendFormat='read' ,
1340
1340
)
1341
1341
).addTarget(
1342
1342
prometheus.target(
1343
- 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{namespace=~"$namespace",resource=~"$resource",verb=~"POST|PUT|PATCH|UPDATE|DELETE"}[1m ])) by(le))' ,
1343
+ 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{namespace=~"$namespace",resource=~"$resource",verb=~"POST|PUT|PATCH|UPDATE|DELETE"}[2m ])) by(le))' ,
1344
1344
legendFormat='write' ,
1345
1345
)
1346
1346
);
@@ -1351,12 +1351,12 @@ local request_rate_read_write = grafana.graphPanel.new(
1351
1351
datasource='OBO' ,
1352
1352
).addTarget(
1353
1353
prometheus.target(
1354
- 'sum(rate(apiserver_request_total{namespace=~"$namespace",resource=~"$resource",verb=~"LIST|GET"}[1m ]))' ,
1354
+ 'sum(rate(apiserver_request_total{namespace=~"$namespace",resource=~"$resource",verb=~"LIST|GET"}[2m ]))' ,
1355
1355
legendFormat='read' ,
1356
1356
)
1357
1357
).addTarget(
1358
1358
prometheus.target(
1359
- 'sum(rate(apiserver_request_total{namespace=~"$namespace",resource=~"$resource",verb=~"POST|PUT|PATCH|UPDATE|DELETE"}[1m ]))' ,
1359
+ 'sum(rate(apiserver_request_total{namespace=~"$namespace",resource=~"$resource",verb=~"POST|PUT|PATCH|UPDATE|DELETE"}[2m ]))' ,
1360
1360
legendFormat='write' ,
1361
1361
)
1362
1362
);
@@ -1368,7 +1368,7 @@ local requests_dropped_rate = grafana.graphPanel.new(
1368
1368
description='Number of requests dropped with "Try again later" response' ,
1369
1369
).addTarget(
1370
1370
prometheus.target(
1371
- 'sum(rate(apiserver_dropped_requests_total{namespace=~"$namespace"}[1m ])) by (requestKind)' ,
1371
+ 'sum(rate(apiserver_dropped_requests_total{namespace=~"$namespace"}[2m ])) by (requestKind)' ,
1372
1372
)
1373
1373
);
1374
1374
@@ -1379,7 +1379,7 @@ local requests_terminated_rate = grafana.graphPanel.new(
1379
1379
description='Number of requests which apiserver terminated in self-defense' ,
1380
1380
).addTarget(
1381
1381
prometheus.target(
1382
- 'sum(rate(apiserver_request_terminations_total{namespace=~"$namespace",resource=~"$resource",code=~"$code"}[1m ])) by(component)' ,
1382
+ 'sum(rate(apiserver_request_terminations_total{namespace=~"$namespace",resource=~"$resource",code=~"$code"}[2m ])) by(component)' ,
1383
1383
)
1384
1384
);
1385
1385
@@ -1396,7 +1396,7 @@ local requests_status_rate = grafana.graphPanel.new(
1396
1396
legend_hideZero=true ,
1397
1397
).addTarget(
1398
1398
prometheus.target(
1399
- 'sum(rate(apiserver_request_total{namespace=~"$namespace",resource=~"$resource",verb=~"$verb",code=~"$code"}[1m ])) by(code)' ,
1399
+ 'sum(rate(apiserver_request_total{namespace=~"$namespace",resource=~"$resource",verb=~"$verb",code=~"$code"}[2m ])) by(code)' ,
1400
1400
legendFormat='{{code}}'
1401
1401
)
1402
1402
);
@@ -1443,7 +1443,7 @@ local pf_requests_rejected = grafana.graphPanel.new(
1443
1443
description='Number of requests rejected by API Priority and Fairness system' ,
1444
1444
).addTarget(
1445
1445
prometheus.target(
1446
- 'sum(rate(apiserver_flowcontrol_rejected_requests_total{namespace=~"$namespace"}[1m ])) by (reason)' ,
1446
+ 'sum(rate(apiserver_flowcontrol_rejected_requests_total{namespace=~"$namespace"}[2m ])) by (reason)' ,
1447
1447
)
1448
1448
);
1449
1449
@@ -1461,7 +1461,7 @@ local response_size_99th_quartile = grafana.graphPanel.new(
1461
1461
legend_hideZero=true ,
1462
1462
).addTarget(
1463
1463
prometheus.target(
1464
- 'histogram_quantile(0.99, sum(rate(apiserver_response_sizes_bucket{namespace=~"$namespace",resource=~"$resource",verb=~"$verb"}[1m ])) by(instance,le))' ,
1464
+ 'histogram_quantile(0.99, sum(rate(apiserver_response_sizes_bucket{namespace=~"$namespace",resource=~"$resource",verb=~"$verb"}[2m ])) by(instance,le))' ,
1465
1465
legendFormat='{{instance}}' ,
1466
1466
)
1467
1467
);
@@ -1480,7 +1480,7 @@ local pf_request_queue_length = grafana.graphPanel.new(
1480
1480
legend_hideZero=true ,
1481
1481
).addTarget(
1482
1482
prometheus.target(
1483
- 'histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_queue_length_after_enqueue_bucket{namespace=~"$namespace"}[1m ])) by(flowSchema, priorityLevel, le))' ,
1483
+ 'histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_queue_length_after_enqueue_bucket{namespace=~"$namespace"}[2m ])) by(flowSchema, priorityLevel, le))' ,
1484
1484
legendFormat='{{flowSchema}}:{{priorityLevel}}' ,
1485
1485
)
1486
1486
);
@@ -1499,7 +1499,7 @@ local pf_request_wait_duration_99th_quartile = grafana.graphPanel.new(
1499
1499
legend_hideZero=true ,
1500
1500
).addTarget(
1501
1501
prometheus.target(
1502
- 'histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_wait_duration_seconds_bucket{namespace=~"$namespace"}[1m ])) by(flowSchema, priorityLevel, le))' ,
1502
+ 'histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_wait_duration_seconds_bucket{namespace=~"$namespace"}[2m ])) by(flowSchema, priorityLevel, le))' ,
1503
1503
legendFormat='{{flowSchema}}:{{priorityLevel}}' ,
1504
1504
)
1505
1505
);
@@ -1518,7 +1518,7 @@ local pf_request_execution_duration = grafana.graphPanel.new(
1518
1518
legend_hideZero=true ,
1519
1519
).addTarget(
1520
1520
prometheus.target(
1521
- 'histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_execution_seconds_bucket{namespace=~"$namespace"}[1m ])) by(flowSchema, priorityLevel, le))' ,
1521
+ 'histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_execution_seconds_bucket{namespace=~"$namespace"}[2m ])) by(flowSchema, priorityLevel, le))' ,
1522
1522
legendFormat='{{flowSchema}}:{{priorityLevel}}' ,
1523
1523
)
1524
1524
);
@@ -1537,7 +1537,7 @@ local pf_request_dispatch_rate = grafana.graphPanel.new(
1537
1537
legend_hideZero=true ,
1538
1538
).addTarget(
1539
1539
prometheus.target(
1540
- 'sum(rate(apiserver_flowcontrol_dispatched_requests_total{namespace=~"$namespace"}[1m ])) by(flowSchema,priorityLevel)' ,
1540
+ 'sum(rate(apiserver_flowcontrol_dispatched_requests_total{namespace=~"$namespace"}[2m ])) by(flowSchema,priorityLevel)' ,
1541
1541
legendFormat='{{flowSchema}}:{{priorityLevel}}' ,
1542
1542
)
1543
1543
);
@@ -1599,19 +1599,6 @@ grafana.dashboard.new(
1599
1599
},
1600
1600
)
1601
1601
1602
- .addTemplate(
1603
- grafana.template.new(
1604
- 'pod' ,
1605
- 'Cluster Prometheus' ,
1606
- 'label_values({pod=~"etcd.*", namespace="$namespace"}, pod)' ,
1607
- refresh=1 ,
1608
- ) {
1609
- type: 'query' ,
1610
- multi: true ,
1611
- includeAll: false ,
1612
- }
1613
- )
1614
-
1615
1602
.addTemplate(
1616
1603
grafana.template.new(
1617
1604
'resource' ,
0 commit comments