Skip to content

Commit

Permalink
Merge branch 'improvement/BB-580-circuit-breaker-alert' into tmp/octo…
Browse files Browse the repository at this point in the history
…pus/w/8.7/improvement/BB-580-circuit-breaker-alert
  • Loading branch information
bert-e committed Jul 5, 2024
2 parents fd24da4 + 7788e47 commit f9198b1
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 11 deletions.
10 changes: 10 additions & 0 deletions lib/CircuitBreaker.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,24 @@ const circuitBreakerGauge = ZenkoMetrics.createGauge({
labelNames: ['type'],
});

const circuitBreakerCounter = ZenkoMetrics.createCounter({
name: 's3_circuit_breaker_errors_count',
help: 'total number of circuit breaker errors',
labelNames: [],
});

function startCircuitBreakerMetricsExport(cb, cbType, intervalMs = collectDefaultMetricsIntervalMs) {
const type = cbType || 'generic';
setInterval(() => {
if (cb.failedProbes) {
circuitBreakerCounter.inc();
}
circuitBreakerGauge.set({ type }, cb.state);
}, intervalMs);
}

module.exports = {
circuitBreakerCounter,
circuitBreakerGauge,
startCircuitBreakerMetricsExport,
updateCircuitBreakerConfigForImplicitOutputQueue,
Expand Down
22 changes: 22 additions & 0 deletions monitoring/lifecycle/alerts.test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -611,3 +611,25 @@ tests:
- alertname: KafkaConsumerRebalanceTimeouts
eval_time: 4m
exp_alerts: []

- name: CircuitBreakerError
interval: 1m
input_series:
- series: s3_circuit_breaker_errors_count{namespace="zenko"}
values: 0 0 1 1 1 1 1 1 1 1
alert_rule_test:
- alertname: CircuitBreakerError
eval_time: 0m
exp_alerts: []
- alertname: CircuitBreakerError
eval_time: 5m
exp_alerts:
- exp_labels:
severity: warning
namespace: zenko
exp_annotations:
summary: "Circuit breaker is unable to query prometheus"
description: "Circuit breaker is unable to query prometheus."
- alertname: CircuitBreakerError
eval_time: 10m
exp_alerts: []
12 changes: 12 additions & 0 deletions monitoring/lifecycle/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,15 @@ groups:
Kafka rebalance has timed out for pod `{{ $labels.pod }}`, which indicates that the consumer
is not working anymore, and should be restarted.
summary: "Kafka consumer has stopped consuming messages"

- name: Circuit Breaker
rules:
- alert: CircuitBreakerError
Expr: |
increase(s3_circuit_breaker_errors_count{namespace="${namespace}"}[5m]) > 0
Labels:
severity: warning
Annotations:
description: >-
Circuit breaker is unable to query prometheus.
summary: "Circuit breaker is unable to query prometheus"
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "backbeat",
"version": "8.6.42",
"version": "8.6.43",
"description": "Asynchronous queue and job manager",
"main": "index.js",
"scripts": {
Expand Down Expand Up @@ -50,7 +50,7 @@
"async": "^2.3.0",
"aws-sdk": "^2.938.0",
"backo": "^1.1.0",
"breakbeat": "scality/breakbeat#v1.0.2",
"breakbeat": "scality/breakbeat#v1.0.3",
"bucketclient": "scality/bucketclient#8.1.9",
"commander": "^2.11.0",
"eslint-plugin-mocha": "^10.2.0",
Expand Down
26 changes: 21 additions & 5 deletions tests/unit/lib/util/circuitBreaker.spec.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const assert = require('assert');
const {
circuitBreakerCounter,
circuitBreakerGauge,
startCircuitBreakerMetricsExport,
updateCircuitBreakerConfigForImplicitOutputQueue
Expand Down Expand Up @@ -164,13 +165,28 @@ describe('updateCircuitBreakerConfigForImplicitOutputQueue', () => {
});

describe('startCircuitBreakerMetricsExport', () => {
it('should export circuit breaker state', done => {
const cb = { state: 1234 };
it('should export circuit breaker state and not increment counter', done => {
const cb = { state: 1234, failedProbes: false };
startCircuitBreakerMetricsExport(cb, 'test', 10);
setTimeout(async () => {
const { values: [{ value, labels }] } = await circuitBreakerGauge.get();
assert.deepStrictEqual(labels.type, 'test');
assert.deepStrictEqual(value, 1234);
const { values: [{ value: gaugeValue, labels: gaugeLabels }] } = await circuitBreakerGauge.get();
const { values: [{ value: counterValue }] } = await circuitBreakerCounter.get();
assert.deepStrictEqual(gaugeLabels.type, 'test');
assert.deepStrictEqual(gaugeValue, 1234);
assert.deepStrictEqual(counterValue, 0);
done();
}, 20);
});

it('should export circuit breaker state and increment counter', done => {
const cb = { state: 1234, failedProbes: true };
startCircuitBreakerMetricsExport(cb, 'test', 10);
setTimeout(async () => {
const { values: [{ value: gaugeValue, labels: gaugeLabels }] } = await circuitBreakerGauge.get();
const { values: [{ value: counterValue }] } = await circuitBreakerCounter.get();
assert.deepStrictEqual(gaugeLabels.type, 'test');
assert.deepStrictEqual(gaugeValue, 1234);
assert.deepStrictEqual(counterValue, 1);
done();
}, 20);
});
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/lifecycle/CircuitBreakerGroup.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,7 @@ describe('extractBucketProcessorCircuitBreakerConfigs', () => {
transition: {
location: {},
topic: {},
global: [ ],
global: [],
},
expiration: {
location: {},
Expand Down
6 changes: 3 additions & 3 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1452,9 +1452,9 @@ brace-expansion@^1.1.7:
balanced-match "^1.0.0"
concat-map "0.0.1"

breakbeat@scality/breakbeat#v1.0.2:
version "1.0.2"
resolved "https://codeload.github.com/scality/breakbeat/tar.gz/4d3844424e424a3e0174a70f19715c57f2ae9c1c"
breakbeat@scality/breakbeat#v1.0.3:
version "1.0.3"
resolved "https://codeload.github.com/scality/breakbeat/tar.gz/d977da40ab8d92d4d9a9f935a604c368b8de4cf0"
dependencies:
"@hapi/joi" "^17.1.1"
"@types/node" "^18.11.11"
Expand Down

0 comments on commit f9198b1

Please sign in to comment.