Skip to content

Commit

Permalink
Merge pull request #5013 from jadiunr/add-threshold-annotation-even-w…
Browse files Browse the repository at this point in the history
…hen-ok-7x

Improvement threshold annotations format (for 7.x)
  • Loading branch information
echlebek authored Jul 25, 2023
2 parents 531dfea + a8369a2 commit 13016bb
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 40 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG-7.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ instead, it is represented underneath the "spec" object.
information. This information can be quite lengthy and can reduce the overall
system performance if agent entities grow to be too large.
- API keys can now be created with sensuctl create.
- Added threshold annotation even when OK status

### Fixed
- Fixed an issue where multi-expression exclusive "Deny" filters were not
Expand All @@ -55,6 +56,7 @@ entity subscriptions and/or a check named `deregistration`.
- Upgraded Go version to 1.19.5. Old Go versions are not supported.
- The sensuctl api-key grant command now returns additional information.
- Handler errors now logged at the error level instead of info level
- Changed the format of threshold annotations

### Removed
- Removed sensu-backend upgrade command. May make an appearance again in later versions.
Expand Down
80 changes: 54 additions & 26 deletions agent/check_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ const (
allowListOnDenyStatus = "allow_list_on_deny_status"
allowListOnDenyOutput = "check command denied by the agent allow list"
undocumentedTestCheckCommand = "!sensu_test_check!"

measureMin = "min"
measureMax = "max"
measureNullStatus = "null-status"
)

// handleCheck is the check message handler.
Expand Down Expand Up @@ -370,26 +366,33 @@ func evaluateOutputMetricThresholds(event *corev2.Event) uint32 {
points := event.Metrics.Points
thresholds := event.Check.OutputMetricThresholds

var status uint32 = 0
var overallStatus uint32 = 0
annotationValue := ""
for _, thresholdRule := range thresholds {
ruleMatched := false
for _, metricPoint := range points {
if thresholdRule.MatchesMetricPoint(metricPoint) {
ruleMatched = true
var status uint32 = 0
isExceeded := false
for _, rule := range thresholdRule.Thresholds {
if rule.Min != "" {
min, err := strconv.ParseFloat(rule.Min, 64)
if err != nil {
continue
}
if metricPoint.Value < min {
addThresholdAnnotation(event, thresholdRule, measureMin, rule.Status, metricPoint.Value, rule.Min)
isExceeded = true
if status < rule.Status {
status = rule.Status
annotationValue = getAnnotationValue(thresholdRule, measureMin, metricPoint.Value, rule.Min)
}
if overallStatus < rule.Status {
overallStatus = rule.Status
annotationValue = getAnnotationValue(thresholdRule, metricPoint.Value, isExceeded)
}
continue
} else {
annotationValue = getAnnotationValue(thresholdRule, metricPoint.Value, isExceeded)
}
}
if rule.Max != "" {
Expand All @@ -398,43 +401,49 @@ func evaluateOutputMetricThresholds(event *corev2.Event) uint32 {
continue
}
if metricPoint.Value > max {
addThresholdAnnotation(event, thresholdRule, measureMax, rule.Status, metricPoint.Value, rule.Max)
isExceeded = true
if status < rule.Status {
status = rule.Status
annotationValue = getAnnotationValue(thresholdRule, measureMax, metricPoint.Value, rule.Max)
}
if overallStatus < rule.Status {
overallStatus = rule.Status
annotationValue = getAnnotationValue(thresholdRule, metricPoint.Value, isExceeded)
}
} else {
annotationValue = getAnnotationValue(thresholdRule, metricPoint.Value, isExceeded)
}
}
}
addThresholdAnnotation(event, thresholdRule, status, metricPoint.Value, isExceeded)
}
}
if !ruleMatched {
if thresholdRule.NullStatus > 0 {
addNullStatusThresholdAnnotation(event, thresholdRule, thresholdRule.NullStatus)
if status < thresholdRule.NullStatus {
status = thresholdRule.NullStatus
if overallStatus < thresholdRule.NullStatus {
overallStatus = thresholdRule.NullStatus
annotationValue = getNullStatusAnnotationValue(thresholdRule)
}
}
}
}

if annotationValue != "" {
event.AddAnnotation("sensu.io/notifications/"+corev2.CheckStatusToCaption(status), annotationValue)
event.AddAnnotation("sensu.io/notifications/"+corev2.CheckStatusToCaption(overallStatus), annotationValue)
}

return status
return overallStatus
}

func addThresholdAnnotation(event *corev2.Event, metricThreshold *corev2.MetricThreshold, measure string, status uint32, value float64, threshold string) {
event.AddAnnotation(getAnnotationKey(metricThreshold, measure, status), getAnnotationValue(metricThreshold, measure, value, threshold))
func addThresholdAnnotation(event *corev2.Event, metricThreshold *corev2.MetricThreshold, status uint32, value float64, isExceeded bool) {
event.AddAnnotation(getAnnotationKey(metricThreshold, status), getAnnotationValue(metricThreshold, value, isExceeded))
}

func addNullStatusThresholdAnnotation(event *corev2.Event, metricThreshold *corev2.MetricThreshold, status uint32) {
event.AddAnnotation(getAnnotationKey(metricThreshold, measureNullStatus, status), getNullStatusAnnotationValue(metricThreshold))
event.AddAnnotation(getAnnotationKey(metricThreshold, status), getNullStatusAnnotationValue(metricThreshold))
}

func getAnnotationKey(metricThreshold *corev2.MetricThreshold, measure string, status uint32) string {
func getAnnotationKey(metricThreshold *corev2.MetricThreshold, status uint32) string {
var key strings.Builder

key.WriteString("sensu.io/output_metric_thresholds/")
Expand All @@ -444,14 +453,12 @@ func getAnnotationKey(metricThreshold *corev2.MetricThreshold, measure string, s
key.WriteString(tag.Value)
}
key.WriteString("/")
key.WriteString(measure)
key.WriteString("/")
key.WriteString(corev2.CheckStatusToCaption(status))

return key.String()
}

func getAnnotationValue(metricThreshold *corev2.MetricThreshold, measure string, value float64, threshold string) string {
func getAnnotationValue(metricThreshold *corev2.MetricThreshold, value float64, isExceeded bool) string {
var val strings.Builder
var tagsKeyVal strings.Builder

Expand All @@ -471,13 +478,34 @@ func getAnnotationValue(metricThreshold *corev2.MetricThreshold, measure string,
val.WriteString(tagsKeyVal.String())
val.WriteString(")")
}
val.WriteString(" exceeded the configured threshold (")
val.WriteString(measure)
val.WriteString(": ")
val.WriteString(threshold)
val.WriteString(", actual: ")
if isExceeded {
val.WriteString(" exceeded the configured threshold")
} else {
val.WriteString(" is within the configured threshold")
}

for _, t := range metricThreshold.Thresholds {
hasMin := len(t.Min) > 0
hasMax := len(t.Max) > 0
val.WriteString("; expected ")
if hasMin {
val.WriteString("min: ")
val.WriteString(t.Min)
}
if hasMin && hasMax {
val.WriteString(" - ")
}
if hasMax {
val.WriteString("max: ")
val.WriteString(t.Max)
}
val.WriteString(" (status: ")
val.WriteString(corev2.CheckStatusToCaption(t.Status))
val.WriteString(")")
}

val.WriteString("; actual: ")
val.WriteString(strconv.FormatFloat(value, 'f', -1, 64))
val.WriteString(").")

return val.String()
}
Expand Down
29 changes: 15 additions & 14 deletions agent/check_handler_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -562,14 +562,15 @@ func TestEvaluateOutputMetricThresholds(t *testing.T) {
metric1 := &corev2.MetricPoint{Name: "disk_rate", Value: 99999.0, Timestamp: now, Tags: nil}
metric2 := &corev2.MetricPoint{Name: "network_rate", Value: 100001.0, Timestamp: now, Tags: []*corev2.MetricTag{{Name: "device", Value: "eth0"}}}

statusOKAnnotation := "sensu.io/notifications/ok"
statusWarningAnnotation := "sensu.io/notifications/warning"
statusUnknownAnnotation := "sensu.io/notifications/unknown"
statusCriticalAnnotation := "sensu.io/notifications/critical"
diskCriticalMinAnnotation := "sensu.io/output_metric_thresholds/disk_rate/min/critical"
diskCriticalMaxAnnotation := "sensu.io/output_metric_thresholds/disk_rate/max/critical"
diskWarningMinAnnotation := "sensu.io/output_metric_thresholds/disk_rate/min/warning"
netUnknownMaxAnnotation := "sensu.io/output_metric_thresholds/network_rate/max/unknown"
notDiskWarningNullAnnotation := "sensu.io/output_metric_thresholds/not_a_disk_rate/null-status/warning"
diskOKAnnotation := "sensu.io/output_metric_thresholds/disk_rate/ok"
diskCriticalAnnotation := "sensu.io/output_metric_thresholds/disk_rate/critical"
diskWarningAnnotation := "sensu.io/output_metric_thresholds/disk_rate/warning"
netUnknownAnnotation := "sensu.io/output_metric_thresholds/network_rate/unknown"
notDiskWarningNullAnnotation := "sensu.io/output_metric_thresholds/not_a_disk_rate/warning"

testCases := []struct {
name string
Expand All @@ -585,42 +586,42 @@ func TestEvaluateOutputMetricThresholds(t *testing.T) {
metrics: []*corev2.MetricPoint{metric1},
thresholds: []*corev2.MetricThreshold{{Name: "disk_rate", Thresholds: []*corev2.MetricThresholdRule{{Min: "200000.0", Status: 2}}}},
expectedStatus: 2,
expectedAnnotations: []string{statusCriticalAnnotation, diskCriticalMinAnnotation},
expectedAnnotations: []string{statusCriticalAnnotation, diskCriticalAnnotation},
}, {
name: "maximum rule match",
event: &corev2.Event{Check: &corev2.Check{Status: 0}},
metrics: []*corev2.MetricPoint{metric1},
thresholds: []*corev2.MetricThreshold{{Name: "disk_rate", Thresholds: []*corev2.MetricThresholdRule{{Max: "50000.0", Status: 2}}}},
expectedStatus: 2,
expectedAnnotations: []string{statusCriticalAnnotation, diskCriticalMaxAnnotation},
expectedAnnotations: []string{statusCriticalAnnotation, diskCriticalAnnotation},
}, {
name: "no min rule match",
event: &corev2.Event{Check: &corev2.Check{Status: 0}},
metrics: []*corev2.MetricPoint{metric1},
thresholds: []*corev2.MetricThreshold{{Name: "disk_rate", Thresholds: []*corev2.MetricThresholdRule{{Min: "50000.0", Status: 2}}}},
expectedStatus: 0,
expectedAnnotations: []string{},
expectedAnnotations: []string{statusOKAnnotation, diskOKAnnotation},
}, {
name: "no max rule match",
event: &corev2.Event{Check: &corev2.Check{Status: 0}},
metrics: []*corev2.MetricPoint{metric1},
thresholds: []*corev2.MetricThreshold{{Name: "disk_rate", Thresholds: []*corev2.MetricThresholdRule{{Max: "200000.0", Status: 2}}}},
expectedStatus: 0,
expectedAnnotations: []string{},
expectedAnnotations: []string{statusOKAnnotation, diskOKAnnotation},
}, {
name: "min and max rule match",
event: &corev2.Event{Check: &corev2.Check{Status: 0}},
metrics: []*corev2.MetricPoint{metric1},
thresholds: []*corev2.MetricThreshold{{Name: "disk_rate", Thresholds: []*corev2.MetricThresholdRule{{Min: "200000.0", Status: 1}, {Max: "75000.0", Status: 2}}}},
expectedStatus: 2,
expectedAnnotations: []string{statusCriticalAnnotation, diskWarningMinAnnotation, diskCriticalMaxAnnotation},
expectedAnnotations: []string{statusCriticalAnnotation, diskCriticalAnnotation},
}, {
name: "only one rule match",
event: &corev2.Event{Check: &corev2.Check{Status: 0}},
metrics: []*corev2.MetricPoint{metric1},
thresholds: []*corev2.MetricThreshold{{Name: "disk_rate", Thresholds: []*corev2.MetricThresholdRule{{Min: "200000.0", Status: 1}, {Max: "200000.0", Status: 2}}}},
expectedStatus: 1,
expectedAnnotations: []string{statusWarningAnnotation, diskWarningMinAnnotation},
expectedAnnotations: []string{statusWarningAnnotation, diskWarningAnnotation},
}, {
name: "no filter match - null status",
event: &corev2.Event{Check: &corev2.Check{Status: 0}},
Expand All @@ -634,22 +635,22 @@ func TestEvaluateOutputMetricThresholds(t *testing.T) {
metrics: []*corev2.MetricPoint{metric1, metric2},
thresholds: []*corev2.MetricThreshold{{Name: "disk_rate", NullStatus: 1, Thresholds: []*corev2.MetricThresholdRule{{Max: "200000.0", Status: 2}}}},
expectedStatus: 0,
expectedAnnotations: []string{},
expectedAnnotations: []string{statusOKAnnotation, diskOKAnnotation},
}, {
name: "multi metric and filter and rule match",
event: &corev2.Event{Check: &corev2.Check{Status: 0}},
metrics: []*corev2.MetricPoint{metric1, metric2},
thresholds: []*corev2.MetricThreshold{{Name: "disk_rate", NullStatus: 1, Thresholds: []*corev2.MetricThresholdRule{{Max: "50000.0", Status: 2}}}},
expectedStatus: 2,
expectedAnnotations: []string{statusCriticalAnnotation, diskCriticalMaxAnnotation},
expectedAnnotations: []string{statusCriticalAnnotation, diskCriticalAnnotation},
}, {
name: "multi metric and multi rule match",
event: &corev2.Event{Check: &corev2.Check{Status: 0}},
metrics: []*corev2.MetricPoint{metric1, metric2},
thresholds: []*corev2.MetricThreshold{{Name: "disk_rate", NullStatus: 1, Thresholds: []*corev2.MetricThresholdRule{{Max: "50000.0", Status: 2}}},
{Name: "network_rate", Thresholds: []*corev2.MetricThresholdRule{{Max: "40000", Status: 3}}}},
expectedStatus: 3,
expectedAnnotations: []string{statusUnknownAnnotation, diskCriticalMaxAnnotation, netUnknownMaxAnnotation},
expectedAnnotations: []string{statusUnknownAnnotation, diskCriticalAnnotation, netUnknownAnnotation},
},
}

Expand Down

0 comments on commit 13016bb

Please sign in to comment.