Skip to content

Commit 9aa271a

Browse files
authored
SRE-86: Add FreeableMemory CloudWatch alert for RDS (#8004)
1 parent f5288c1 commit 9aa271a

File tree

1 file changed

+34
-2
lines changed

1 file changed

+34
-2
lines changed

infra/terraform/hash/postgres/alerts.tf

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ resource "aws_cloudwatch_metric_alarm" "rds_free_storage_space" {
2929
namespace = "AWS/RDS"
3030
statistic = "Minimum"
3131
period = 300 # 5 minutes
32-
evaluation_periods = 2 # Must be low for 10 minutes total
32+
evaluation_periods = 2 # 10 minutes total
33+
datapoints_to_alarm = 2 # Both datapoints must be low
3334
threshold = 10 * 1024 * 1024 * 1024 # 10GB in bytes
3435
comparison_operator = "LessThanThreshold"
3536
treat_missing_data = "breaching"
@@ -58,7 +59,8 @@ resource "aws_cloudwatch_metric_alarm" "rds_cpu_utilization_high" {
5859
namespace = "AWS/RDS"
5960
statistic = "Average"
6061
period = 300 # 5 minutes
61-
evaluation_periods = 2 # Must be high for 10 minutes total
62+
evaluation_periods = 5 # 25 minutes total
63+
datapoints_to_alarm = 3 # 3 of 5 datapoints must be high (grace for spikes)
6264
threshold = 80 # 80%
6365
comparison_operator = "GreaterThanThreshold"
6466
treat_missing_data = "notBreaching"
@@ -76,3 +78,33 @@ resource "aws_cloudwatch_metric_alarm" "rds_cpu_utilization_high" {
7678
Purpose = "Alert when RDS CPU utilization is consistently high"
7779
}
7880
}
81+
82+
# CloudWatch Alarm for RDS freeable memory
83+
resource "aws_cloudwatch_metric_alarm" "rds_freeable_memory_low" {
84+
alarm_name = "${var.prefix}-rds-freeable-memory-low"
85+
alarm_description = "CRITICAL: RDS instance ${aws_db_instance.postgres.identifier} has low freeable memory."
86+
87+
# RDS memory metrics
88+
metric_name = "FreeableMemory"
89+
namespace = "AWS/RDS"
90+
statistic = "Minimum"
91+
period = 300 # 5 minutes
92+
evaluation_periods = 3 # 15 minutes total
93+
datapoints_to_alarm = 2 # 2 of 3 datapoints must be low (moderate grace)
94+
threshold = 256 * 1024 * 1024 # 256MB in bytes
95+
comparison_operator = "LessThanThreshold"
96+
treat_missing_data = "breaching"
97+
98+
dimensions = {
99+
DBInstanceIdentifier = aws_db_instance.postgres.identifier
100+
}
101+
102+
alarm_actions = [aws_sns_topic.database_alerts.arn]
103+
ok_actions = [aws_sns_topic.database_alerts.arn]
104+
105+
tags = {
106+
Name = "${var.prefix}-rds-freeable-memory-low-alarm"
107+
Severity = "CRITICAL"
108+
Purpose = "Alert when RDS freeable memory is critically low"
109+
}
110+
}

0 commit comments

Comments
 (0)