Skip to content

Commit

Permalink
add metrics for individual OVN components (#138)
Browse files Browse the repository at this point in the history
  • Loading branch information
mohit-sheth authored Dec 10, 2024
1 parent f374918 commit 8c0b3d8
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 9 deletions.
33 changes: 33 additions & 0 deletions assets/ocp-performance/queries.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,39 @@ local generateTimeSeriesQuery(query, legend) = [
query():
generateTimeSeriesQuery('topk(10, sum(container_memory_rss{pod=~"ovnkube-node-.*",namespace="openshift-ovn-kubernetes",container="ovn-controller"}) by (pod,node))', '{{pod}} - {{node}}'),
},
ovnKubeControllerCPU: {
query():
generateTimeSeriesQuery('topk(10, sum( irate(container_cpu_usage_seconds_total{pod=~"ovnkube-.*",namespace="openshift-ovn-kubernetes",container="ovnkube-controller"}[$interval])*100) by (pod,node) )', '{{pod}} - {{node}}'),
},
ovnKubeControllerMem: {
query():
generateTimeSeriesQuery('topk(10, sum(container_memory_rss{pod=~"ovnkube-node-.*",namespace="openshift-ovn-kubernetes",container="ovnkube-controller"}) by (pod,node))', '{{pod}} - {{node}}'),
},
topNbdbCPU: {
query():
generateTimeSeriesQuery('topk(10, sum( irate(container_cpu_usage_seconds_total{pod=~"ovnkube-.*",namespace="openshift-ovn-kubernetes",container="nbdb"}[$interval])*100) by (pod,node) )', '{{pod}} - {{node}}'),
},
topNbdbMem: {
query():
generateTimeSeriesQuery('topk(10, sum(container_memory_rss{pod=~"ovnkube-node-.*",namespace="openshift-ovn-kubernetes",container="nbdb"}) by (pod,node))', '{{pod}} - {{node}}'),
},
topNorthdCPU: {
query():
generateTimeSeriesQuery('topk(10, sum( irate(container_cpu_usage_seconds_total{pod=~"ovnkube-.*",namespace="openshift-ovn-kubernetes",container="northd"}[$interval])*100) by (pod,node) )', '{{pod}} - {{node}}'),
},
topNorthdMem: {
query():
generateTimeSeriesQuery('topk(10, sum(container_memory_rss{pod=~"ovnkube-node-.*",namespace="openshift-ovn-kubernetes",container="northd"}) by (pod,node))', '{{pod}} - {{node}}'),
},
topSbdbCPU: {
query():
generateTimeSeriesQuery('topk(10, sum( irate(container_cpu_usage_seconds_total{pod=~"ovnkube-.*",namespace="openshift-ovn-kubernetes",container="sbdb"}[$interval])*100) by (pod,node) )', '{{pod}} - {{node}}'),
},
topSbdbMem: {
query():
generateTimeSeriesQuery('topk(10, sum(container_memory_rss{pod=~"ovnkube-node-.*",namespace="openshift-ovn-kubernetes",container="sbdb"}) by (pod,node))', '{{pod}} - {{node}}'),
},

promReplCpuUsage: {
query():
generateTimeSeriesQuery('sum(irate(container_cpu_usage_seconds_total{pod=~"prometheus-k8s-0",namespace!="",name!="",container="prometheus"}[$interval])) by (pod,container) * 100', '{{pod}}')
Expand Down
26 changes: 17 additions & 9 deletions templates/General/ocp-performance.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,23 @@ g.dashboard.new('Openshift Performance')
+ g.panel.row.withPanels([
panels.timeSeries.genericLegend('ovnkube-control-plane CPU Usage', 'percent', queries.ovnKubeControlPlaneCPU.query(), { x: 0, y: 1, w: 12, h: 8 }),
panels.timeSeries.genericLegend('ovnkube-control-plane Memory Usage', 'bytes', queries.ovnKubeControlPlaneMem.query(), { x: 12, y: 1, w: 12, h: 8 }),
panels.timeSeries.genericLegend('Top 10 ovn-controller CPU Usage', 'percent', queries.topOvnControllerCPU.query(), { x: 0, y: 9, w: 12, h: 8 }),
panels.timeSeries.genericLegend('Top 10 ovn-controller Memory Usage', 'bytes', queries.topOvnControllerMem.query(), { x: 12, y: 9, w: 12, h: 8 }),
panels.timeSeries.genericLegend('ovs-master CPU Usage', 'percent', queries.OVSCPU.query('$_master_node'), { x: 0, y: 17, w: 12, h: 8 }),
panels.timeSeries.genericLegend('ovs-master Memory Usage', 'bytes', queries.OVSMemory.query('$_master_node'), { x: 12, y: 17, w: 12, h: 8 }),
panels.timeSeries.genericLegend('ovs-worker CPU Usage', 'percent', queries.OVSCPU.query('$_worker_node'), { x: 0, y: 25, w: 12, h: 8 }),
panels.timeSeries.genericLegend('ovs-worker Memory Usage', 'bytes', queries.OVSMemory.query('$_worker_node'), { x: 12, y: 25, w: 12, h: 8 }),
panels.timeSeries.genericLegend('99% Pod Annotation Latency', 's', queries.ovnAnnotationLatency.query(), { x: 0, y: 33, w: 8, h: 8 }),
panels.timeSeries.genericLegend('99% CNI Request ADD Latency', 's', queries.ovnCNIAdd.query(), { x: 8, y: 41, w: 8, h: 8 }),
panels.timeSeries.genericLegend('99% CNI Request DEL Latency', 's', queries.ovnCNIDel.query(), { x: 16, y: 41, w: 8, h: 8 }),
panels.timeSeries.genericLegend('Top 10 ovnkube-controller CPU Usage', 'percent', queries.ovnKubeControllerCPU.query(), { x: 0, y: 10, w: 12, h: 8 }),
panels.timeSeries.genericLegend('Top 10 ovnkube-controller Memory Usage', 'bytes', queries.ovnKubeControllerMem.query(), { x: 12, y: 10, w: 12, h: 8 }),
panels.timeSeries.genericLegend('Top 10 ovn-controller CPU Usage', 'percent', queries.topOvnControllerCPU.query(), { x: 0, y: 18, w: 12, h: 8 }),
panels.timeSeries.genericLegend('Top 10 ovn-controller Memory Usage', 'bytes', queries.topOvnControllerMem.query(), { x: 12, y: 18, w: 12, h: 8 }),
panels.timeSeries.genericLegend('Top 10 nbdb CPU Usage', 'percent', queries.topNbdbCPU.query(), { x: 0, y: 26, w: 12, h: 8 }),
panels.timeSeries.genericLegend('Top 10 nbdb Memory Usage', 'bytes', queries.topNbdbMem.query(), { x: 12, y: 26, w: 12, h: 8 }),
panels.timeSeries.genericLegend('Top 10 northd CPU Usage', 'percent', queries.topNorthdCPU.query(), { x: 0, y: 34, w: 12, h: 8 }),
panels.timeSeries.genericLegend('Top 10 northd Memory Usage', 'bytes', queries.topNorthdMem.query(), { x: 12, y: 34, w: 12, h: 8 }),
panels.timeSeries.genericLegend('Top 10 sbdb CPU Usage', 'percent', queries.topSbdbCPU.query(), { x: 0, y: 42, w: 12, h: 8 }),
panels.timeSeries.genericLegend('Top 10 sbdb Memory Usage', 'bytes', queries.topSbdbMem.query(), { x: 12, y: 42, w: 12, h: 8 }),
panels.timeSeries.genericLegend('ovs-master CPU Usage', 'percent', queries.OVSCPU.query('$_master_node'), { x: 0, y: 50, w: 12, h: 8 }),
panels.timeSeries.genericLegend('ovs-master Memory Usage', 'bytes', queries.OVSMemory.query('$_master_node'), { x: 12, y: 50, w: 12, h: 8 }),
panels.timeSeries.genericLegend('ovs-worker CPU Usage', 'percent', queries.OVSCPU.query('$_worker_node'), { x: 0, y: 58, w: 12, h: 8 }),
panels.timeSeries.genericLegend('ovs-worker Memory Usage', 'bytes', queries.OVSMemory.query('$_worker_node'), { x: 12, y: 58, w: 12, h: 8 }),
panels.timeSeries.genericLegend('99% Pod Annotation Latency', 's', queries.ovnAnnotationLatency.query(), { x: 0, y: 66, w: 8, h: 8 }),
panels.timeSeries.genericLegend('99% CNI Request ADD Latency', 's', queries.ovnCNIAdd.query(), { x: 8, y: 66, w: 8, h: 8 }),
panels.timeSeries.genericLegend('99% CNI Request DEL Latency', 's', queries.ovnCNIDel.query(), { x: 16, y: 66, w: 8, h: 8 }),
]),
g.panel.row.new('Monitoring stack')
+ g.panel.row.withGridPos({ x: 0, y: 0, w: 24, h: 1 })
Expand Down

0 comments on commit 8c0b3d8

Please sign in to comment.