Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/golangci-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ jobs:
- name: golangci-lint
uses: golangci/golangci-lint-action@v9
with:
version: v2.1.6
version: v2.9.0
10 changes: 10 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ run:
tests: false
linters:
default: all
enable:
- wsl_v5
disable:
- wsl
- cyclop
- depguard
- err113
Expand All @@ -25,6 +28,13 @@ linters:
- varnamelen
- wrapcheck
- funlen
settings:
wsl_v5:
allow-first-in-block: true
allow-whole-block: true
branch-max-lines: 2
disable:
- err
exclusions:
generated: lax
presets:
Expand Down
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,41 @@ $ check_prometheus alert --name "HostHighCpuLoad" --name "PrometheusTargetMissin
OK - Alerts inactive | total=2 firing=0 pending=0 inactive=2
```

#### Checking alerts via their labels

The `--include-label` and `--exclude-label` options can be used to filter alerts:

```bash
$ check_prometheus alert --include-label severity=warning
OK - 2 Alerts: 0 Firing - 0 Pending - 2 Inactive
\_[OK] [MysqlTooManyConnections] is inactive
\_[OK] [MysqlHighPreparedStatementsUtilization] is inactive
```

```bash
$ check_prometheus alert --include-label namespace=production --exclude-label severity=info
OK - 1 Alerts: 0 Firing - 0 Pending - 1 Inactive
\_[OK] [ApacheDown] is inactive
```

#### Checking watchdog alerts

In Prometheus a "watchdog" or "dead man's switch" is an alert that is always firing to ensure alerting pipeline is working. The `-W, --watchdog` flag can be used to flip/negate the exit state of the plugin for these kind of alerts:

```bash
$ check_prometheus alert --name Watchdog -W --no-alerts-state 2
[OK] - 1 Alerts: 1 Firing - 0 Pending - 0 Inactive
\_ [OK] [Watchdog] is firing - value: 1.00 - {"alertname":"Watchdog","severity":"none"}
|total=1 firing=1 pending=0 inactive=0
```

```bash
$ check_prometheus alert --name Watchdog -W --no-alerts-state 2
[CRITICAL] - 0 Alerts: 0 Firing - 0 Pending - 0 Inactive
\_ [CRITICAL] No alerts retrieved
|total=0 firing=0 pending=0 inactive=0
```

## License

Copyright (c) 2022 [NETWAYS GmbH](mailto:info@netways.de)
Expand Down
39 changes: 36 additions & 3 deletions cmd/alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ type AlertConfig struct {
ExcludeLabels []string
IncludeLabels []string
ProblemsOnly bool
FlipExitState bool
StateLabelKey string
NoAlertsState string
}
Expand Down Expand Up @@ -99,6 +100,7 @@ inactive = 0`,
if cliAlertConfig.AlertName != nil {
check.ExitRaw(check.Unknown, "No such alert defined", "|", pdlist.String())
}

check.ExitRaw(noAlertsState, "No alerts defined", "|", pdlist.String())
}

Expand Down Expand Up @@ -163,7 +165,13 @@ inactive = 0`,

sc := result.NewPartialResult()

_ = sc.SetState(rl.GetStatus(cliAlertConfig.StateLabelKey))
rlStatus := rl.GetStatus(cliAlertConfig.StateLabelKey)
// If the negate flag is set we negate this state
if cliAlertConfig.FlipExitState {
rlStatus = negateStatus(rlStatus)
}

_ = sc.SetState(rlStatus)
sc.Output = rl.GetOutput()
overall.AddSubcheck(sc)
}
Expand All @@ -185,7 +193,13 @@ inactive = 0`,

sc := result.NewPartialResult()

_ = sc.SetState(rl.GetStatus(cliAlertConfig.StateLabelKey))
rlStatus := rl.GetStatus(cliAlertConfig.StateLabelKey)
// If the negate flag is set we negate this state
if cliAlertConfig.FlipExitState {
rlStatus = negateStatus(rlStatus)
}

_ = sc.SetState(rlStatus)
// Set the alert in the internal Type to generate the output
rl.Alert = alert
sc.Output = rl.GetOutput()
Expand Down Expand Up @@ -257,9 +271,12 @@ func init() {
fs.BoolVarP(&cliAlertConfig.ProblemsOnly, "problems", "P", false,
"Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed")

fs.BoolVarP(&cliAlertConfig.FlipExitState, "watchdog", "W", false,
"Flip the exit state for firing alerts. When this flag is set firing alerts will be OK and inactive alerts will be CRITICAL. This is intended for handling watchdog alerts")

fs.StringVarP(&cliAlertConfig.StateLabelKey, "label-key-state", "S", "",
"Use the given AlertRule label to override the exit state for firing alerts."+
"\nIf this flag is set the plugin looks for warning/critical/ok in the provided label key")
"\nIf this flag is set the plugin looks for the strings 'warning/critical/ok' in the provided label key")
}

// Function to convert state to integer.
Expand Down Expand Up @@ -314,3 +331,19 @@ func matchesLabel(labels model.LabelSet, labelsToMatch []string) bool {

return false
}

// negateStatus turns an OK state into critical and a warning/critical state into OK
func negateStatus(state int) int {
switch state {
case check.OK:
return check.Critical
case check.Critical:
return check.OK
case check.Warning:
return check.OK
case check.Unknown:
return check.Unknown
default:
return check.Unknown
}
}
9 changes: 9 additions & 0 deletions cmd/alert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,15 @@ exit status 2
args: []string{"run", "../main.go", "alert", "--name", "InactiveAlert"},
expected: "[OK] - 1 Alerts: 0 Firing - 0 Pending - 1 Inactive\n\\_ [OK] [InactiveAlert] is inactive\n|total=1 firing=0 pending=0 inactive=1\n\n",
},
{
name: "alert-watchdog",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write(loadTestdata(alertTestDataSet2))
})),
args: []string{"run", "../main.go", "alert", "--name", "InactiveAlert", "-W"},
expected: "[CRITICAL] - 1 Alerts: 0 Firing - 0 Pending - 1 Inactive\n\\_ [CRITICAL] [InactiveAlert] is inactive\n|total=1 firing=0 pending=0 inactive=1\n\nexit status 2\n",
},
{
name: "alert-recording-rule",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand Down
1 change: 1 addition & 0 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ func (c *Config) NewClient() *client.Client {
// Using a Bearer Token for authentication
if c.Bearer != "" {
var t = config.NewInlineSecret(c.Bearer)

rt = config.NewAuthorizationCredentialsRoundTripper("Bearer", t, rt)
}

Expand Down
2 changes: 2 additions & 0 deletions cmd/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Ready: Checks the readiness of an endpoint, which returns OK if the Prometheus s

// Creating an client and connecting to the API
c := cliConfig.NewClient()

err := c.Connect()
if err != nil {
check.ExitError(err)
Expand Down Expand Up @@ -61,6 +62,7 @@ Ready: Checks the readiness of an endpoint, which returns OK if the Prometheus s
if err != nil {
check.ExitError(err)
}

partialResult := result.NewPartialResult()

_ = partialResult.SetState(rc)
Expand Down
5 changes: 3 additions & 2 deletions cmd/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ Note: Time range values e.G. 'go_memstats_alloc_bytes_total[0s]' only the latest
}

c := cliConfig.NewClient()

err = c.Connect()
if err != nil {
check.ExitError(err)
Expand All @@ -93,6 +94,7 @@ Note: Time range values e.G. 'go_memstats_alloc_bytes_total[0s]' only the latest
if strings.Contains(err.Error(), "unmarshalerDecoder: unexpected value type \"string\"") {
err = errors.New("string value results are not supported")
}

check.ExitError(err)
}

Expand All @@ -112,10 +114,8 @@ Note: Time range values e.G. 'go_memstats_alloc_bytes_total[0s]' only the latest
case model.ValVector:
// Instant vector - a set of time series containing a single sample for each time series, all sharing the same timestamp
vectorVal := result.(model.Vector)

// Set initial capacity to reduce memory allocations
for _, sample := range vectorVal {

numberValue := float64(sample.Value)
partial := goresult.NewPartialResult()

Expand Down Expand Up @@ -185,6 +185,7 @@ Note: Time range values e.G. 'go_memstats_alloc_bytes_total[0s]' only the latest
appendum := fmt.Sprintf("HTTP Warnings: %v", strings.Join(warnings, ", "))
overall.Summary = overall.GetOutput() + appendum
}

check.ExitRaw(overall.GetStatus(), overall.GetOutput())
},
}
Expand Down
4 changes: 3 additions & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ func Execute(version string) {
rootCmd.Version = version
rootCmd.VersionTemplate()

if err := rootCmd.Execute(); err != nil {
err := rootCmd.Execute()

if err != nil {
check.ExitError(err)
}
}
Expand Down
3 changes: 1 addition & 2 deletions internal/alert/alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const (
alertnameLabelKey = "alertname"
)

// Internal representation of Prometheus Rules.
// Rule is the internal representation of a Prometheus Rules.
// Alert attribute will be used when iterating over multiple AlertingRules.
type Rule struct {
AlertingRule v1.AlertingRule
Expand Down Expand Up @@ -139,7 +139,6 @@ func (a *Rule) GetOutput() (output string) {
// Add current value to output
value, _ = strconv.ParseFloat(a.Alert.Value, 32)
out.WriteString(fmt.Sprintf(" is %s - value: %.2f", a.AlertingRule.State, value))

// Add labels to the output
l, err := json.Marshal(a.Alert.Labels)

Expand Down
7 changes: 2 additions & 5 deletions internal/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package client
import (
"context"
"fmt"
"maps"
"net/http"
"net/url"
"strings"
Expand Down Expand Up @@ -31,7 +32,6 @@ func (c *Client) Connect() error {
Address: c.URL,
RoundTripper: c.RoundTripper,
})

if err != nil {
return fmt.Errorf("error creating client: %w", err)
}
Expand Down Expand Up @@ -113,10 +113,7 @@ func cloneRequest(r *http.Request) *http.Request {
r2 := new(http.Request)
*r2 = *r
// Deep copy of the Header.
r2.Header = make(http.Header)
for k, s := range r.Header {
r2.Header[k] = s
}
maps.Copy(r.Header, r2.Header)

return r2
}
9 changes: 8 additions & 1 deletion testdata/alertmanager/alert.rules
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
groups:
- name: Test Alerts for check_plugin
rules:

- alert: Watchdog
annotations:
message: |
This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing.
expr: vector(1)
labels:
severity: none
- alert: PrometheusTargetMissing
expr: up == 0
for: 0m
Expand Down