Skip to content

Commit

Permalink
Add alerts for internal metrics (#1412)
Browse files Browse the repository at this point in the history
  • Loading branch information
marctc authored Nov 29, 2024
1 parent 26bd308 commit 4174fa5
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 0 deletions.
3 changes: 3 additions & 0 deletions ops/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Beyla monitoring mixins

This mixin includes Grafana Dashboards and Alerts for Beyla.
97 changes: 97 additions & 0 deletions ops/alerts/alerts.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
{
prometheusAlerts+: {
groups+: [
{
name: 'beyla_internal_alerts',
rules: [
{
alert: 'BeylaOTELMetricExportHighErrorRate',
expr: |||
100 * sum(rate(beyla_otel_metric_export_errors_total[1m])) by (cluster)
/
sum(rate(beyla_otel_metric_exports_total[1m])) by (cluster)
> 2
|||,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Beyla has a high error rate for OTEL metric exports',
description: 'Beyla in { $labels.cluster }} is experiencing {{ printf "%.2f" $value }}% errors for OTEL metric exports.',
},
},
{
alert: 'BeylaOTELMetricExportHighErrorRate',
expr: |||
100 * sum(rate(beyla_otel_metric_export_errors_total[1m])) by (cluster)
/
sum(rate(beyla_otel_metric_exports_total[1m])) by (cluster)
> 5
|||,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Beyla has a high error rate for OTEL metric exports',
description: 'Beyla in { $labels.cluster }} is experiencing {{ printf "%.2f" $value }}% errors for OTEL metric exports.',
},
},
{
alert: 'BeylaOTELTraceExportHighErrorRate',
expr: |||
100 * sum(rate(beyla_otel_trace_export_errors_total[1m])) by (cluster)
/
sum(rate(beyla_otel_trace_exports_total[1m])) by (cluster)
> 2
|||,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Beyla has a high error rate for OTEL trace exports',
description: 'Beyla in { $labels.cluster }} is experiencing {{ printf "%.2f" $value }}% errors for OTEL trace exports.',
},
},
{
alert: 'BeylaOTELTraceExportHighErrorRate',
expr: |||
100 * sum(rate(beyla_otel_trace_export_errors_total[1m])) by (cluster)
/
sum(rate(beyla_otel_trace_exports_total[1m])) by (cluster)
> 5
|||,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Beyla has a high error rate for OTEL trace exports',
description: 'Beyla in { $labels.cluster }} is experiencing {{ printf "%.2f" $value }}% errors for OTEL trace exports.',
},
},
{
alert: 'BeylaInstrumentedProcessesNoTelemetry',
expr: |||
sum by(cluster) (beyla_instrumented_processes{process_name!="beyla"}) > 1 and
(absent(sum by(cluster) (rate(beyla_otel_metric_exports_total[5m]))) or
absent(sum by(cluster) (rate(beyla_otel_trace_exports_total[5m]))) or
absent(sum by(cluster) (rate(beyla_ebpf_tracer_flushes_sum[5m]))) or
absent(sum by(cluster) (rate(beyla_prometheus_http_requests_total[5m]))))
|||,
'for': '10m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Beyla has instrumented processes without metrics or traces',
description: 'Beyla in { $labels.cluster }} has are more than 1 instrumented processes, but no metrics or traces have been exported in the last 10 minutes.',
},
},
],
},
],
},
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
15 changes: 15 additions & 0 deletions ops/dashboards/dashboards.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
grafanaDashboardFolder+: 'beyla',
grafanaDashboards+: {
'beyla_debug.json': (import './beyla_debug.json') {
uid: std.md5('beyla_debug.json'),
},
'application.json': (import './application.json') {
uid: std.md5('application.json'),
},
// TODO: Include this dashboard only if application_process is enabled
'application_process.json': (import './application_process.json') {
uid: std.md5('application_process.json'),
},
},
}
2 changes: 2 additions & 0 deletions ops/mixin.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
(import './alerts/alerts.libsonnet')
+ (import './dashboards/dashboards.libsonnet')

0 comments on commit 4174fa5

Please sign in to comment.