Skip to content

Commit

Permalink
Update CPU and memory queries in local monitoring manifest
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyabiradar07 committed Aug 22, 2024
1 parent 06ba1ff commit 499a252
Showing 1 changed file with 64 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,17 @@ slo:

aggregation_functions:
- function: 'avg'
query: 'avg(kube_pod_container_resource_requests{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", container="$CONTAINER_NAME$", namespace="$NAMESPACE", resource="cpu", unit="core"})'
query: 'avg by(container, namespace) (kube_pod_container_resource_requests{container!='', container!="", container!="POD", pod!="", resource="cpu", unit="core" ,namespace="$NAMESPACE$",container="$CONTAINER_NAME$"})'

# Show sum of cpu requests in bytes for a container in a deployment
- function: 'sum'
query: 'sum(kube_pod_container_resource_requests{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", container="$CONTAINER_NAME$", namespace="$NAMESPACE", resource="cpu", unit="core"})'
query: 'sum by(container, namespace) (kube_pod_container_resource_requests{container!='', container!="", container!="POD", pod!="", resource="cpu", unit="core" ,namespace="$NAMESPACE$",container="$CONTAINER_NAME$"})'

- function: 'min'
query: 'min by(container, namespace) (kube_pod_container_resource_requests{container!='', container!="", container!="POD", pod!="", resource="cpu", unit="core" ,namespace="$NAMESPACE$",container="$CONTAINER_NAME$"})'

- function: 'max'
query: 'max by(container, namespace) (kube_pod_container_resource_requests{container!='', container!="", container!="POD", pod!="", resource="cpu", unit="core" ,namespace="$NAMESPACE$",container="$CONTAINER_NAME$"})'

# CPU Limit
# Show cpu limits in bytes for a container in a deployment
Expand All @@ -39,11 +44,17 @@ slo:

aggregation_functions:
- function: avg
query: 'avg(kube_pod_container_resource_limits{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", container="$CONTAINER_NAME$", namespace="$NAMESPACE", resource="cpu", unit="core"})'
query: 'avg by(container,namespace) (kube_pod_container_resource_limits{container!='', container!="", container!="POD", pod!="", resource="cpu", unit="core",namespace="$NAMESPACE$",container="$CONTAINER_NAME$"})'

# Show sum of cpu limits in bytes for a container in a deployment
- function: sum
query: 'sum(kube_pod_container_resource_limits{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", container="$CONTAINER_NAME$", namespace="$NAMESPACE$", resource="cpu", unit="core"})'
query: 'sum by(container,namespace) (kube_pod_container_resource_limits{container!='',container!="", container!="POD", pod!="", resource="cpu", unit="core",namespace="$NAMESPACE$",container="$CONTAINER_NAME$"})'

- function: 'max'
query: 'max by(container,namespace) (kube_pod_container_resource_limits{container!='', container!="", container!="POD", pod!="", resource="cpu", unit="core",namespace="$NAMESPACE$",container="$CONTAINER_NAME$"})'

- function: 'max'
query: 'min by(container,namespace) (kube_pod_container_resource_limits{container!='', container!="", container!="POD", pod!="", resource="cpu", unit="core",namespace="$NAMESPACE$",container="$CONTAINER_NAME$"})'


# CPU Usage
Expand All @@ -65,45 +76,45 @@ slo:
# For openshift versions <=4.8
aggregation_functions:
- function: avg
query: 'avg(avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace="$NAMESPACE$", container=$CONTAINER_NAME$”}[15m]))'
query: 'avg by(container, namespace)(avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{container!='', container!="POD", pod!="",namespace="$NAMESPACE$",container="$CONTAINER_NAME$" }[$MEASUREMENT_DURATION_IN_MIN$m]))'
versions: "<=4.8"

# For openshift versions >=4.9
- function: avg
query: 'avg(avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace="$NAMESPACE$", container=$CONTAINER_NAME$”}[15m]))'
query: 'avg by(container, namespace)(avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!='', container!="POD", pod!="",namespace="$NAMESPACE$",container="$CONTAINER_NAME$" }[$MEASUREMENT_DURATION_IN_MIN$m]))'
versions: ">4.9"

# Approx minimum CPU per container in a deployment
# For openshift versions <=4.8
- function: min
query: 'min(min_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace="$NAMESPACE$", container="$CONTAINER_NAME$"}[15m]))'
query: 'min by(container, namespace)(min_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{container!='', container!="POD", pod!="",namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\" }[$MEASUREMENT_DURATION_IN_MIN$m]))'
versions: "<=4.8"

# For openshift versions >=4.9
- function: min
query: 'min(min_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace="$NAMESPACE$", container="$CONTAINER_NAME$"}[15m]))'
query: 'min by(container, namespace)(min_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!='', container!="POD", pod!="",namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m]))'
versions: ">4.9"

# Approx maximum CPU per container in a deployment
# For openshift versions <=4.8
- function: max
query: 'max(max_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace="$NAMESPACE$", container="$CONTAINER_NAME$"}[15m]))'
query: 'max by(container, namespace)(max_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{container!='', container!="POD", pod!="",namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\" }[$MEASUREMENT_DURATION_IN_MIN$m]))'
versions: "<=4.8"

# For openshift versions >=4.9
- function: max
query: 'max(max_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace="$NAMESPACE$", container="$CONTAINER_NAME$"}[15m]))'
query: 'max by(container, namespace)(max_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!='', container!="POD", pod!="",namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\" }[$MEASUREMENT_DURATION_IN_MIN$m]))'
versions: ">4.9"

# Sum of CPU usage for a container in all pods of a deployment
# For openshift versions <=4.8
- function: sum
query: 'sum(avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace="$NAMESPACE$", container="$CONTAINER_NAME$"}[15m]))'
query: 'sum by(container, namespace)(avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{container!='', container!="POD", pod!="",namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\" }[$MEASUREMENT_DURATION_IN_MIN$m]))'
versions: "<=4.8"

# For openshift versions >=4.9
- function: sum
query: 'sum(avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace="$NAMESPACE$", container="$CONTAINER_NAME$"}[15m]))'
query: 'sum by(container, namespace)(avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!='', container!="POD", pod!="",namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\" }[$MEASUREMENT_DURATION_IN_MIN$m]))'
versions: ">4.9"


Expand All @@ -116,15 +127,19 @@ slo:
aggregation_functions:
# Average CPU throttling per container in a deployment
- function: avg
query: 'avg(rate(container_cpu_cfs_throttled_seconds_total{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace="$NAMESPACE$", container=$CONTAINER_NAME$”}[15m]))'
query: 'avg by(container,namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!='', container!="POD", pod!="",namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m]))'

# Maximum CPU throttling per container in a deployment
- function: max
query: 'max(rate(container_cpu_cfs_throttled_seconds_total{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace="$NAMESPACE$", container=”$CONTAINER_NAME$”}[15m]))'
query: 'max by(container,namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!='', container!="POD", pod!="",namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m])))'

# Min of CPU throttling for a container in all pods of a deployment
- function: min
query: 'min by(container,namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!='', container!="POD", pod!="",namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m])'

# Sum of CPU throttling for a container in all pods of a deployment
- function: sum
query: 'sum(rate(container_cpu_cfs_throttled_seconds_total{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace="$NAMESPACE$", container=$CONTAINER_NAME$”}[15m]))'
query: 'sum by(container,namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!='', container!="POD", pod!="",namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m]))'



Expand All @@ -139,11 +154,17 @@ slo:

aggregation_functions:
- function: avg
query: 'avg(kube_pod_container_resource_requests{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", container=”$CONTAINER_NAME$”, namespace=”$NAMESPACE”, resource="memory", unit="byte"})'
query: 'avg by(container, namespace) (kube_pod_container_resource_requests{container!='', container!="POD", pod!="", resource="memory", unit="byte" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"})'

# Show sum of memory requests in bytes for a container in a deployment
- function: sum
query: 'sum(kube_pod_container_resource_requests{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", container=”$CONTAINER_NAME$”, namespace=”$NAMESPACE”, resource="memory", unit="byte"})'
query: 'sum by(container, namespace) (kube_pod_container_resource_requests{container!='', container!="POD", pod!="", resource="memory", unit="byte" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"})'

- function: max
query: 'max by(container, namespace) (kube_pod_container_resource_requests{container!='', container!="POD", pod!="", resource="memory", unit="byte" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"})'

- function: min
query: 'min by(container, namespace) (kube_pod_container_resource_requests{container!='', container!="POD", pod!="", resource="memory", unit="byte" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"})'


# Memory Limit
Expand All @@ -155,12 +176,17 @@ slo:

aggregation_functions:
- function: avg
query: 'avg(kube_pod_container_resource_limits{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", container="$CONTAINER_NAME$", namespace="$NAMESPACE", resource="memory", unit="byte"})'
query: 'avg by(container,namespace) (kube_pod_container_resource_limits{container!='', container!="POD", pod!="", resource="memory", unit="byte" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"})'

# Show sum of memory limits in bytes for a container in a deployment
- function: sum
query: 'sum(kube_pod_container_resource_limits{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", container=”$CONTAINER_NAME$”, namespace=”$NAMESPACE”, resource="memory", unit="byte"})'
query: 'sum by(container,namespace) (kube_pod_container_resource_limits{container!='', container!="POD", pod!="", resource="memory", unit="byte" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"})'

- function: max
query: 'max by(container,namespace) (kube_pod_container_resource_limits{container!='', container!="POD", pod!="", resource="memory", unit="byte" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"})'

- function: min
query: 'min by(container,namespace) (kube_pod_container_resource_limits{container!='', container!="POD", pod!="", resource="memory", unit="byte" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"})'

# Memory Usage
# Average memory per container in a deployment
Expand All @@ -171,19 +197,19 @@ slo:

aggregation_functions:
- function: avg
query: 'avg(avg_over_time(container_memory_working_set_bytes{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace=$NAMESPACE$, container=$CONTAINER_NAME$”}[15m]))'
query: 'avg by(container, namespace) (avg_over_time(container_memory_working_set_bytes{container!='', container!="POD", pod!="", namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m]))'

# Approx minimum memory per container in a deployment
- function: min
query: 'min(min_over_time(container_memory_working_set_bytes{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace=$NAMESPACE$, container="$CONTAINER_NAME$"}[15m]))'
query: 'min by(container, namespace) (min_over_time(container_memory_working_set_bytes{container!='', container!="POD", pod!="", namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\" }[$MEASUREMENT_DURATION_IN_MIN$m])'

# Approx maximum memory per container in a deployment
- function: max
query: 'max(max_over_time(container_memory_working_set_bytes{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace=$NAMESPACE$, container="$CONTAINER_NAME$"}[15m]))'
query: 'max by(container, namespace) (max_over_time(container_memory_working_set_bytes{container!='', container!="POD", pod!="", namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m]))'

# Sum of memory usage for a contianer in all pods of a deployment
- function: sum
query: 'sum(avg_over_time(container_memory_working_set_bytes{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace=$NAMESPACE$, container="$CONTAINER_NAME$"}[15m]))'
query: 'sum by(container, namespace) (avg_over_time(container_memory_working_set_bytes{container!='', container!="POD", pod!="", namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m]))'


# 2.4 Memory RSS
Expand All @@ -195,17 +221,28 @@ slo:
aggregation_functions:
# Average memory RSS per container in a deployment
- function: avg
query: 'avg(avg_over_time(container_memory_rss{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace=$NAMESPACE$, container=$CONTAINER_NAME$”}[15m]))'
query: 'avg by(container, namespace) (avg_over_time(container_memory_rss{container!='', container!="POD", pod!="" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m]))'

# Approx minimum memory RSS per container in a deployment
- function: min
query: 'min(min_over_time(container_memory_rss{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace=$NAMESPACE$, container="$CONTAINER_NAME$"}[15m]))'
query: 'min by(container, namespace) (min_over_time(container_memory_rss{container!='', container!="POD", pod!="" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m])'


# Approx maximum memory RSS per container in a deployment
- function: max
query: 'max(max_over_time(container_memory_rss{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace=$NAMESPACE$, container="$CONTAINER_NAME$"}[15m]))'
query: 'max by(container, namespace) (max_over_time(container_memory_rss{container!='', container!="POD", pod!="" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m]))'

# Sum of memory RSS for a contianer in all pods of a deployment
- function: sum
query: 'sum(avg_over_time(container_memory_rss{pod=~"$DEPLOYMENT_NAME$-[^-]*-[^-]*$", namespace=$NAMESPACE$, container=”$CONTAINER_NAME$”}[15m]))'
query: 'sum by(container, namespace) (avg_over_time(container_memory_rss{container!='', container!="POD", pod!="" ,namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"}[$MEASUREMENT_DURATION_IN_MIN$m]))'


# Container Last Active Timestamp
- name: maxDate
datasource: prometheus
value_type: "double"
kubernetes_object: "container"

aggregation_functions:
- function: max
query: 'max by(namespace,container) (last_over_time((timestamp(container_cpu_usage_seconds_total{namespace=\"$NAMESPACE$\",container=\"$CONTAINER_NAME$\"} > 0))[15d:]))'

0 comments on commit 499a252

Please sign in to comment.