From f0691eb42091547dc7530d4159347addfec84189 Mon Sep 17 00:00:00 2001 From: Maha Benzekri Date: Fri, 5 Jul 2024 16:03:50 +0200 Subject: [PATCH 1/7] BB-580:circuitBreakerCounter introduction --- lib/CircuitBreaker.js | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/CircuitBreaker.js b/lib/CircuitBreaker.js index 937fd2450..25a8f0f88 100644 --- a/lib/CircuitBreaker.js +++ b/lib/CircuitBreaker.js @@ -46,14 +46,24 @@ const circuitBreakerGauge = ZenkoMetrics.createGauge({ labelNames: ['type'], }); +const circuitBreakerCounter = ZenkoMetrics.createCounter({ + name: 's3_circuit_breaker_errors_count', + help: 'total number of circuit breaker errors', + labelNames: [], +}); + function startCircuitBreakerMetricsExport(cb, cbType, intervalMs = collectDefaultMetricsIntervalMs) { const type = cbType || 'generic'; setInterval(() => { + if (cb.failedProbes) { + circuitBreakerCounter.inc(); + } circuitBreakerGauge.set({ type }, cb.state); }, intervalMs); } module.exports = { + circuitBreakerCounter, circuitBreakerGauge, startCircuitBreakerMetricsExport, updateCircuitBreakerConfigForImplicitOutputQueue, From e8d9c95c1e4f873390368024a596781e62183a99 Mon Sep 17 00:00:00 2001 From: Maha Benzekri Date: Fri, 5 Jul 2024 16:04:35 +0200 Subject: [PATCH 2/7] BB-580:alert setup --- monitoring/lifecycle/alerts.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/monitoring/lifecycle/alerts.yaml b/monitoring/lifecycle/alerts.yaml index 45be54f32..526ef8a74 100644 --- a/monitoring/lifecycle/alerts.yaml +++ b/monitoring/lifecycle/alerts.yaml @@ -334,3 +334,15 @@ groups: Kafka rebalance has timed out for pod `{{ $labels.pod }}`, which indicates that the consumer is not working anymore, and should be restarted. summary: "Kafka consumer has stopped consuming messages" + +- name: Circuit Breaker + rules: + - alert: CircuitBreakerError + Expr: | + increase(s3_circuit_breaker_errors_count{namespace="${namespace}"}[5m]) > 0 + Labels: + severity: warning + Annotations: + description: >- + Circuit breaker is unable to query prometheus. + summary: "Circuit breaker is unable to query prometheus" From b385c4f76fd3b798fa06f4db83a4c9457b056372 Mon Sep 17 00:00:00 2001 From: Maha Benzekri Date: Fri, 5 Jul 2024 16:05:07 +0200 Subject: [PATCH 3/7] BB-580:alert test --- monitoring/lifecycle/alerts.test.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/monitoring/lifecycle/alerts.test.yaml b/monitoring/lifecycle/alerts.test.yaml index 4ef4e90e8..379cb3dc0 100644 --- a/monitoring/lifecycle/alerts.test.yaml +++ b/monitoring/lifecycle/alerts.test.yaml @@ -611,3 +611,25 @@ tests: - alertname: KafkaConsumerRebalanceTimeouts eval_time: 4m exp_alerts: [] + + - name: CircuitBreakerError + interval: 1m + input_series: + - series: s3_circuit_breaker_errors_count{namespace="zenko"} + values: 0 0 1 1 1 1 1 1 1 1 + alert_rule_test: + - alertname: CircuitBreakerError + eval_time: 0m + exp_alerts: [] + - alertname: CircuitBreakerError + eval_time: 5m + exp_alerts: + - exp_labels: + severity: warning + namespace: zenko + exp_annotations: + summary: "Circuit breaker is unable to query prometheus" + description: "Circuit breaker is unable to query prometheus." + - alertname: CircuitBreakerError + eval_time: 10m + exp_alerts: [] From 33c239d9972ee094402b4a80363d4c3b87c9fa65 Mon Sep 17 00:00:00 2001 From: Maha Benzekri Date: Fri, 5 Jul 2024 16:05:33 +0200 Subject: [PATCH 4/7] BB-580:circuitbreaker test update --- tests/unit/lib/util/circuitBreaker.spec.js | 26 +++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tests/unit/lib/util/circuitBreaker.spec.js b/tests/unit/lib/util/circuitBreaker.spec.js index f3e077d8d..37c43469b 100644 --- a/tests/unit/lib/util/circuitBreaker.spec.js +++ b/tests/unit/lib/util/circuitBreaker.spec.js @@ -1,5 +1,6 @@ const assert = require('assert'); const { + circuitBreakerCounter, circuitBreakerGauge, startCircuitBreakerMetricsExport, updateCircuitBreakerConfigForImplicitOutputQueue @@ -164,13 +165,28 @@ describe('updateCircuitBreakerConfigForImplicitOutputQueue', () => { }); describe('startCircuitBreakerMetricsExport', () => { - it('should export circuit breaker state', done => { - const cb = { state: 1234 }; + it('should export circuit breaker state and not increment counter', done => { + const cb = { state: 1234, failedProbes: false }; startCircuitBreakerMetricsExport(cb, 'test', 10); setTimeout(async () => { - const { values: [{ value, labels }] } = await circuitBreakerGauge.get(); - assert.deepStrictEqual(labels.type, 'test'); - assert.deepStrictEqual(value, 1234); + const { values: [{ value: gaugeValue, labels: gaugeLabels }] } = await circuitBreakerGauge.get(); + const { values: [{ value: counterValue }] } = await circuitBreakerCounter.get(); + assert.deepStrictEqual(gaugeLabels.type, 'test'); + assert.deepStrictEqual(gaugeValue, 1234); + assert.deepStrictEqual(counterValue, 0); + done(); + }, 20); + }); + + it('should export circuit breaker state and increment counter', done => { + const cb = { state: 1234, failedProbes: true }; + startCircuitBreakerMetricsExport(cb, 'test', 10); + setTimeout(async () => { + const { values: [{ value: gaugeValue, labels: gaugeLabels }] } = await circuitBreakerGauge.get(); + const { values: [{ value: counterValue }] } = await circuitBreakerCounter.get(); + assert.deepStrictEqual(gaugeLabels.type, 'test'); + assert.deepStrictEqual(gaugeValue, 1234); + assert.deepStrictEqual(counterValue, 1); done(); }, 20); }); From f71bab52bd525db99cef6a612334e2fa49ca82fd Mon Sep 17 00:00:00 2001 From: Maha Benzekri Date: Fri, 5 Jul 2024 16:05:45 +0200 Subject: [PATCH 5/7] BB-580:lint warn fix --- tests/unit/lifecycle/CircuitBreakerGroup.spec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/lifecycle/CircuitBreakerGroup.spec.js b/tests/unit/lifecycle/CircuitBreakerGroup.spec.js index 138c2ef50..56e825d45 100644 --- a/tests/unit/lifecycle/CircuitBreakerGroup.spec.js +++ b/tests/unit/lifecycle/CircuitBreakerGroup.spec.js @@ -967,7 +967,7 @@ describe('extractBucketProcessorCircuitBreakerConfigs', () => { transition: { location: {}, topic: {}, - global: [ ], + global: [], }, expiration: { location: {}, From 715d5891accdc8a2a98e7249e56d1fd3d6cae9e6 Mon Sep 17 00:00:00 2001 From: Maha Benzekri Date: Fri, 5 Jul 2024 16:06:07 +0200 Subject: [PATCH 6/7] BB-580:breakbeat version bump --- package.json | 2 +- yarn.lock | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/package.json b/package.json index c0ad3a2ad..ccef5a89b 100644 --- a/package.json +++ b/package.json @@ -50,7 +50,7 @@ "async": "^2.3.0", "aws-sdk": "^2.938.0", "backo": "^1.1.0", - "breakbeat": "scality/breakbeat#v1.0.2", + "breakbeat": "scality/breakbeat#v1.0.3", "bucketclient": "scality/bucketclient#8.1.9", "commander": "^2.11.0", "fcntl": "github:scality/node-fcntl#0.2.2", diff --git a/yarn.lock b/yarn.lock index 3afe98544..d8a6973e7 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1452,9 +1452,9 @@ brace-expansion@^1.1.7: balanced-match "^1.0.0" concat-map "0.0.1" -breakbeat@scality/breakbeat#v1.0.2: - version "1.0.2" - resolved "https://codeload.github.com/scality/breakbeat/tar.gz/4d3844424e424a3e0174a70f19715c57f2ae9c1c" +breakbeat@scality/breakbeat#v1.0.3: + version "1.0.3" + resolved "https://codeload.github.com/scality/breakbeat/tar.gz/d977da40ab8d92d4d9a9f935a604c368b8de4cf0" dependencies: "@hapi/joi" "^17.1.1" "@types/node" "^18.11.11" From 7788e47f4a59c3148b976fbcb251fc77465e60c7 Mon Sep 17 00:00:00 2001 From: Maha Benzekri Date: Fri, 5 Jul 2024 16:46:36 +0200 Subject: [PATCH 7/7] BB-580:project version bump --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index ccef5a89b..0dfc8c6c4 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "backbeat", - "version": "8.6.42", + "version": "8.6.43", "description": "Asynchronous queue and job manager", "main": "index.js", "scripts": {