Skip to content

Commit

Permalink
Update mixin dashboard (#4078)
Browse files Browse the repository at this point in the history
Update and rewrite the mixin dashboard to use the grafonnet ([1])
library.
Grafana has deprecated angular plugins ([2]) as used by grafonnet-lib
([3]) with removal pending for grafana version 12.
Additionally grafonnet-lib is deprecated/unmaintained in favor of
grafonnet.
Therefore the mixin dashboard has been updated to use grafonnet.

[1]
https://github.com/grafana/grafonnet

[2]
https://grafana.com/docs/grafana/latest/developers/angular_deprecation/

[3]
https://github.com/grafana/grafonnet-lib

Signed-off-by: Jan Horstmann <[email protected]>
  • Loading branch information
janhorstmann authored Oct 29, 2024
1 parent d04ef60 commit bd70e73
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 138 deletions.
281 changes: 150 additions & 131 deletions doc/alertmanager-mixin/dashboards/overview.libsonnet
Original file line number Diff line number Diff line change
@@ -1,154 +1,173 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local prometheus = grafana.query.prometheus;
local variable = dashboard.variable;
local panel = grafana.panel;
local row = panel.row;

{
grafanaDashboards+:: {

local amQuerySelector = std.join(',', ['%s=~"$%s"' % [label, label] for label in std.split($._config.alertmanagerClusterLabels, ',')]),
local amNameDashboardLegend = std.join('/', ['{{%s}}' % [label] for label in std.split($._config.alertmanagerNameLabels, ',')]),

local alertmanagerClusterSelectorTemplates =
local datasource =
variable.datasource.new('datasource', 'prometheus')
+ variable.datasource.generalOptions.withLabel('Data Source')
+ variable.datasource.generalOptions.withCurrent('Prometheus')
+ variable.datasource.generalOptions.showOnDashboard.withLabelAndValue(),

local alertmanagerClusterSelectorVariables =
[
template.new(
name=label,
label=label,
datasource='$datasource',
query='label_values(alertmanager_alerts, %s)' % label,
current='',
refresh=2,
includeAll=false,
sort=1
)
variable.query.new(label)
+ variable.query.generalOptions.withLabel(label)
+ variable.query.withDatasourceFromVariable(datasource)
+ variable.query.queryTypes.withLabelValues(label, metric='alertmanager_alerts')
+ variable.query.generalOptions.withCurrent('')
+ variable.query.refresh.onTime()
+ variable.query.selectionOptions.withIncludeAll(false)
+ variable.query.withSort(type='alphabetical')
for label in std.split($._config.alertmanagerClusterLabels, ',')
],

local integrationTemplate =
template.new(
name='integration',
datasource='$datasource',
query='label_values(alertmanager_notifications_total{integration=~"%s"}, integration)' % $._config.alertmanagerCriticalIntegrationsRegEx,
current='all',
hide='2', // Always hide
refresh=2,
includeAll=true,
sort=1
),
local integrationVariable =
variable.query.new('integration')
+ variable.query.withDatasourceFromVariable(datasource)
+ variable.query.queryTypes.withLabelValues('integration', metric='alertmanager_notifications_total{integration=~"%s"}' % $._config.alertmanagerCriticalIntegrationsRegEx)
+ variable.query.generalOptions.withCurrent('$__all')
+ variable.datasource.generalOptions.showOnDashboard.withNothing()
+ variable.query.refresh.onTime()
+ variable.query.selectionOptions.withIncludeAll(true)
+ variable.query.withSort(type='alphabetical'),

local panelTimeSeriesStdOptions =
{}
+ panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal')
+ panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10)
+ panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never')
+ panel.timeSeries.options.legend.withShowLegend(false)
+ panel.timeSeries.options.tooltip.withMode('multi')
+ panel.timeSeries.queryOptions.withDatasource('prometheus', '$datasource'),

'alertmanager-overview.json':
local alerts =
graphPanel.new(
'Alerts',
description='current set of alerts stored in the Alertmanager',
datasource='$datasource',
span=6,
format='none',
stack=true,
fill=1,
legend_show=false,
)
.addTarget(prometheus.target('sum(alertmanager_alerts{%(amQuerySelector)s}) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s' % $._config { amNameDashboardLegend: amNameDashboardLegend }));
panel.timeSeries.new('Alerts')
+ panel.timeSeries.panelOptions.withDescription('current set of alerts stored in the Alertmanager')
+ panel.timeSeries.standardOptions.withUnit('none')
+ panelTimeSeriesStdOptions
+ panel.timeSeries.queryOptions.withTargets([
prometheus.new(
'$datasource',
'sum(alertmanager_alerts{%(amQuerySelector)s}) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
)
+ prometheus.withIntervalFactor(2)
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
]);

local alertsRate =
graphPanel.new(
'Alerts receive rate',
description='rate of successful and invalid alerts received by the Alertmanager',
datasource='$datasource',
span=6,
format='ops',
stack=true,
fill=1,
legend_show=false,
)
.addTarget(prometheus.target('sum(rate(alertmanager_alerts_received_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Received' % $._config { amNameDashboardLegend: amNameDashboardLegend }))
.addTarget(prometheus.target('sum(rate(alertmanager_alerts_invalid_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Invalid' % $._config { amNameDashboardLegend: amNameDashboardLegend }));
panel.timeSeries.new('Alerts receive rate')
+ panel.timeSeries.panelOptions.withDescription('rate of successful and invalid alerts received by the Alertmanager')
+ panel.timeSeries.standardOptions.withUnit('ops')
+ panelTimeSeriesStdOptions
+ panel.timeSeries.queryOptions.withTargets([
prometheus.new(
'$datasource',
'sum(rate(alertmanager_alerts_received_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
)
+ prometheus.withIntervalFactor(2)
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Received' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
prometheus.new(
'$datasource',
'sum(rate(alertmanager_alerts_invalid_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
)
+ prometheus.withIntervalFactor(2)
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Invalid' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
]);

local notifications =
graphPanel.new(
'$integration: Notifications Send Rate',
description='rate of successful and invalid notifications sent by the Alertmanager',
datasource='$datasource',
format='ops',
stack=true,
fill=1,
legend_show=false,
repeat='integration'
)
.addTarget(prometheus.target('sum(rate(alertmanager_notifications_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Total' % $._config { amNameDashboardLegend: amNameDashboardLegend }))
.addTarget(prometheus.target('sum(rate(alertmanager_notifications_failed_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Failed' % $._config { amNameDashboardLegend: amNameDashboardLegend }));
panel.timeSeries.new('$integration: Notifications Send Rate')
+ panel.timeSeries.panelOptions.withDescription('rate of successful and invalid notifications sent by the Alertmanager')
+ panel.timeSeries.standardOptions.withUnit('ops')
+ panelTimeSeriesStdOptions
+ panel.timeSeries.panelOptions.withRepeat('integration')
+ panel.timeSeries.queryOptions.withTargets([
prometheus.new(
'$datasource',
'sum(rate(alertmanager_notifications_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
)
+ prometheus.withIntervalFactor(2)
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Total' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
prometheus.new(
'$datasource',
'sum(rate(alertmanager_notifications_failed_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
)
+ prometheus.withIntervalFactor(2)
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Failed' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
]);

local notificationDuration =
graphPanel.new(
'$integration: Notification Duration',
description='latency of notifications sent by the Alertmanager',
datasource='$datasource',
format='s',
stack=false,
fill=1,
legend_show=false,
repeat='integration'
)
.addTarget(prometheus.target(
|||
histogram_quantile(0.99,
sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
)
||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s 99th Percentile' % $._config { amNameDashboardLegend: amNameDashboardLegend }
))
.addTarget(prometheus.target(
|||
histogram_quantile(0.50,
sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
)
||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Median' % $._config { amNameDashboardLegend: amNameDashboardLegend }
))
.addTarget(prometheus.target(
|||
sum(rate(alertmanager_notification_latency_seconds_sum{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
/
sum(rate(alertmanager_notification_latency_seconds_count{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Average' % $._config { amNameDashboardLegend: amNameDashboardLegend }
));
panel.timeSeries.new('$integration: Notification Duration')
+ panel.timeSeries.panelOptions.withDescription('latency of notifications sent by the Alertmanager')
+ panel.timeSeries.standardOptions.withUnit('s')
+ panelTimeSeriesStdOptions
+ panel.timeSeries.panelOptions.withRepeat('integration')
+ panel.timeSeries.queryOptions.withTargets([
prometheus.new(
'$datasource',
|||
histogram_quantile(0.99,
sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
)
||| % $._config { amQuerySelector: amQuerySelector },
)
+ prometheus.withIntervalFactor(2)
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s 99th Percentile' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
prometheus.new(
'$datasource',
|||
histogram_quantile(0.50,
sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
)
||| % $._config { amQuerySelector: amQuerySelector },
)
+ prometheus.withIntervalFactor(2)
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Median' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
prometheus.new(
'$datasource',
|||
sum(rate(alertmanager_notification_latency_seconds_sum{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
/
sum(rate(alertmanager_notification_latency_seconds_count{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
||| % $._config { amQuerySelector: amQuerySelector },
)
+ prometheus.withIntervalFactor(2)
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Average' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
]);

dashboard.new(
'%sOverview' % $._config.dashboardNamePrefix,
time_from='now-1h',
tags=($._config.dashboardTags),
timezone='utc',
refresh='30s',
graphTooltip='shared_crosshair',
uid='alertmanager-overview'
)
.addTemplate(
{
current: {
text: 'Prometheus',
value: 'Prometheus',
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
)
.addTemplates(alertmanagerClusterSelectorTemplates)
.addTemplate(integrationTemplate)
.addRow(
row.new('Alerts')
.addPanel(alerts)
.addPanel(alertsRate)
)
.addRow(
row.new('Notifications')
.addPanel(notifications)
.addPanel(notificationDuration)
),
dashboard.new('%sOverview' % $._config.dashboardNamePrefix)
+ dashboard.time.withFrom('now-1h')
+ dashboard.withTags($._config.dashboardTags)
+ dashboard.withTimezone('utc')
+ dashboard.timepicker.withRefreshIntervals('30s')
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withUid('alertmanager-overview')
+ dashboard.withVariables(
[datasource]
+ alertmanagerClusterSelectorVariables
+ [integrationVariable]
)
+ dashboard.withPanels(
grafana.util.grid.makeGrid([
row.new('Alerts')
+ row.withPanels([
alerts,
alertsRate
]),
row.new('Notifications')
+ row.withPanels([
notifications,
notificationDuration
])
], panelWidth=12, panelHeight=7)
)
},
}
6 changes: 3 additions & 3 deletions doc/alertmanager-mixin/jsonnetfile.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet"
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-latest"
}
},
"version": "master"
"version": "main"
}
],
"legacyImports": false
Expand Down
38 changes: 34 additions & 4 deletions doc/alertmanager-mixin/jsonnetfile.lock.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,42 @@
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet"
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-latest"
}
},
"version": "55cf4ee53ced2b6d3ce96ecce9fb813b4465be98",
"sum": "4/sUV0Kk+o8I+wlYxL9R6EPhL/NiLfYHk+NXlU64RUk="
"version": "1ce5aec95ce32336fe47c8881361847c475b5254",
"sum": "64fMUPI3frXGj4X1FqFd1t7r04w3CUSmXaDcJ23EYbQ="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-v11.1.0"
}
},
"version": "1ce5aec95ce32336fe47c8881361847c475b5254",
"sum": "41w7p/rwrNsITqNHMXtGSJAfAyKmnflg6rFhKBduUxM="
},
{
"source": {
"git": {
"remote": "https://github.com/jsonnet-libs/docsonnet.git",
"subdir": "doc-util"
}
},
"version": "6ac6c69685b8c29c54515448eaca583da2d88150",
"sum": "BrAL/k23jq+xy9oA7TWIhUx07dsA/QLm3g7ktCwe//U="
},
{
"source": {
"git": {
"remote": "https://github.com/jsonnet-libs/xtd.git",
"subdir": ""
}
},
"version": "63d430b69a95741061c2f7fc9d84b1a778511d9c",
"sum": "qiZi3axUSXCVzKUF83zSAxklwrnitMmrDK4XAfjPMdE="
}
],
"legacyImports": false
Expand Down

0 comments on commit bd70e73

Please sign in to comment.