Skip to content

Commit c08e376

Browse files
committed
(fleet/keycloak) add alerts
1 parent c8cb0b0 commit c08e376

File tree

1 file changed

+120
-0
lines changed

1 file changed

+120
-0
lines changed
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
---
2+
# based on https://github.com/integr8ly/keycloak-operator/blob/master/deploy/template/prometheus-rule.yaml
3+
apiVersion: monitoring.coreos.com/v1
4+
kind: PrometheusRule
5+
metadata:
6+
labels:
7+
lsst.io/rule: "true"
8+
name: keycloak
9+
spec:
10+
groups:
11+
- name: keycloak
12+
rules:
13+
- alert: KeycloakJavaHeapThresholdExceeded
14+
annotations:
15+
message: >-
16+
{{ printf "%0.0f" $value }}% heap usage of {{ $labels.area }} in pod {{
17+
$labels.pod }}, namespace {{ $labels.namespace }}.
18+
expr: |
19+
100 * jvm_memory_bytes_used{area="heap",job="keycloak-metrics"}
20+
/ jvm_memory_bytes_max{area="heap",job="keycloak-metrics"}
21+
> 90
22+
for: 1m
23+
labels:
24+
severity: warning
25+
26+
- alert: KeycloakJavaNonHeapThresholdExceeded
27+
annotations:
28+
message: >-
29+
{{ printf "%0.0f" $value }}% nonheap usage of {{ $labels.area }} in pod {{
30+
$labels.pod }}, namespace {{ $labels.namespace }}.
31+
expr: |
32+
100 * jvm_memory_bytes_used{area="nonheap",job="keycloak-metrics"}
33+
/ jvm_memory_bytes_max{area="nonheap",job="keycloak-metrics"}
34+
> 90
35+
for: 1m
36+
labels:
37+
severity: warning
38+
39+
- alert: KeycloakJavaGCTimePerMinuteScavenge
40+
annotations:
41+
message: >-
42+
Amount of time per minute spent on garbage collection of {{ $labels.area }}
43+
in pod {{ $labels.pod }}, namespace {{ $labels.namespace }} exceeds 90%.
44+
This could indicate that the available heap memory is insufficient.
45+
expr: |
46+
increase(jvm_gc_collection_seconds_sum{gc="PS Scavenge",job="keycloak-metrics"}[1m]) > 1 * 60 * 0.9
47+
for: 1m
48+
labels:
49+
severity: warning
50+
51+
- alert: KeycloakJavaGCTimePerMinuteMarkSweep
52+
annotations:
53+
message: >-
54+
Amount of time per minute spent on garbage collection of {{ $labels.area }}
55+
in pod {{ $labels.pod }}, namespace {{ $labels.namespace }} exceeds 90%.
56+
This could indicate that the available heap memory is insufficient.
57+
expr: |
58+
increase(jvm_gc_collection_seconds_sum{gc="PS MarkSweep",job="keycloak-metrics"}[1m]) > 1 * 60 * 0.9
59+
for: 1m
60+
labels:
61+
severity: warning
62+
63+
- alert: KeycloakJavaDeadlockedThreads
64+
annotations:
65+
message: >-
66+
Number of threads in deadlock state of {{ $labels.area }}
67+
in pod {{ $labels.pod }}, namespace {{ $labels.namespace }}
68+
expr: |
69+
jvm_threads_deadlocked{jobs="keycloak-metrics"}
70+
> 0
71+
for: 1m
72+
labels:
73+
severity: warning
74+
75+
- alert: KeycloakLoginFailedThresholdExceeded
76+
annotations:
77+
message: >-
78+
More than 50 failed login attempts for realm {{ $labels.realm }},
79+
provider {{ $labels.provider }}, namespace {{ $labels.namespace }}
80+
over the last 5 minutes. (Rate of {{ printf "%0f" $value }})
81+
expr: >
82+
rate(keycloak_failed_login_attempts{job="keycloak-metrics"}[5m])
83+
* 300 > 50
84+
for: 5m
85+
labels:
86+
severity: warning
87+
88+
- alert: KeycloakInstanceNotAvailable
89+
annotations:
90+
message: >-
91+
Keycloak instance in namespace {{ $labels.namespace }} has not
92+
been available for the last 5 minutes.
93+
expr: kube_statefulset_status_current_revision{namespace="keycloak",statefulset="keycloak"} != 1
94+
for: 5m
95+
labels:
96+
severity: critical
97+
98+
- alert: KeycloakAPIRequestDuration90PercThresholdExceeded
99+
annotations:
100+
message: >-
101+
90% of the total requests are not served within 1 second for the last 5 minutes for the RH SSO API in the {{ $labels.namespace }} namespace
102+
expr: >
103+
(sum(rate(keycloak_request_duration_bucket{le="1000.0"}[5m])) by (job)
104+
/
105+
sum(rate(keycloak_request_duration_count[5m])) by (job)) < 0.90
106+
for: 5m
107+
labels:
108+
severity: warning
109+
110+
- alert: KeycloakAPIRequestDuration99PercThresholdExceeded
111+
annotations:
112+
message: >-
113+
99.5% of the total requests are not served within 10 seconds for the last 5 minutes for the RH SSO API in the {{ $labels.namespace }} namespace
114+
expr: >
115+
(sum(rate(keycloak_request_duration_bucket{le="10000.0"}[5m])) by (job)
116+
/
117+
sum(rate(keycloak_request_duration_count[5m])) by (job)) < 0.995
118+
for: 5m
119+
labels:
120+
severity: warning

0 commit comments

Comments
 (0)