Skip to content

Commit c2fbe8e

Browse files
authored
fix: pause resume and delete old infra (#156)
* fix: pause resume and delete old infra * fix: multiple
1 parent b8ab16e commit c2fbe8e

File tree

8 files changed

+221
-300
lines changed

8 files changed

+221
-300
lines changed

.gitattributes

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Autodetect text files and forces unix eols, so Windows does not break them
2+
* text=auto eol=lf
3+
4+
# Force images/fonts to be handled as binaries
5+
*.jpg binary
6+
*.jpeg binary
7+
*.gif binary
8+
*.png binary

.github/scripts/pause.sh

Lines changed: 101 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#!/bin/bash
22
# This script pauses AWS resources (ECS service and RDS Aurora cluster) in the current AWS account.
3+
# Made idempotent - safe to run multiple times, checks resource existence before acting.
34

4-
set -e # Exit on error
5+
set -euo pipefail # Exit on error, undefined variables, and pipe failures
56

67
# Error handler function
78
function error_handler() {
@@ -31,8 +32,12 @@ function validate_args() {
3132
# Check if Aurora DB cluster exists and get its status
3233
function check_aurora_cluster() {
3334
local cluster_id="${STACK_PREFIX}-aurora-${ENVIRONMENT}"
34-
local status=$(aws rds describe-db-clusters --db-cluster-identifier "$cluster_id" \
35-
--query 'DBClusters[0].Status' --output text 2>/dev/null || echo "false")
35+
36+
# Use || true to handle expected failures gracefully
37+
local status
38+
status=$(aws rds describe-db-clusters --db-cluster-identifier "$cluster_id" \
39+
--query 'DBClusters[0].Status' --output text 2>/dev/null || echo "not-found")
40+
3641
echo "$status"
3742
}
3843

@@ -41,23 +46,63 @@ function pause_aurora_cluster() {
4146
local cluster_id="${STACK_PREFIX}-aurora-${ENVIRONMENT}"
4247
local status=$1
4348

44-
if [ "$status" = "false" ]; then
45-
echo "Skipping Aurora pause operation: DB cluster does not exist"
46-
return
49+
echo "Aurora cluster status: ${status}"
50+
51+
if [ "$status" = "not-found" ]; then
52+
echo "Aurora cluster does not exist - skipping pause operation"
53+
return 0
4754
elif [ "$status" = "available" ]; then
4855
echo "Pausing Aurora cluster: $cluster_id"
49-
aws rds stop-db-cluster --db-cluster-identifier "$cluster_id" --no-cli-pager --output json
56+
57+
# Use if/then structure for better error handling
58+
if ! aws rds stop-db-cluster --db-cluster-identifier "$cluster_id" --no-cli-pager --output json; then
59+
echo "Failed to pause Aurora cluster: $cluster_id"
60+
return 1
61+
else
62+
echo "Aurora cluster pause initiated successfully"
63+
fi
64+
elif [ "$status" = "stopped" ]; then
65+
echo "Aurora cluster is already stopped - no action needed"
66+
elif [ "$status" = "stopping" ]; then
67+
echo "Aurora cluster is already stopping - no action needed"
5068
else
51-
echo "DB cluster is not in an available state. Current state: $status"
69+
echo "Aurora cluster is in state: $status - no action taken"
5270
fi
71+
72+
return 0
5373
}
5474

5575
# Check if ECS cluster exists
5676
function check_ecs_cluster() {
5777
local cluster_name="${STACK_PREFIX}-node-api-${ENVIRONMENT}"
58-
local status=$(aws ecs describe-clusters --clusters "$cluster_name" \
59-
--query 'clusters[0].status' --output text 2>/dev/null || echo "INACTIVE")
60-
echo "$status"
78+
79+
# Use || true to handle expected failures gracefully
80+
local status
81+
status=$(aws ecs describe-clusters --clusters "$cluster_name" \
82+
--query 'clusters[0].status' --output text 2>/dev/null || echo "not-found")
83+
84+
if [ "$status" = "None" ] || [ -z "$status" ]; then
85+
echo "not-found"
86+
else
87+
echo "$status"
88+
fi
89+
}
90+
91+
# Check if ECS service exists
92+
function check_ecs_service() {
93+
local cluster_name="${STACK_PREFIX}-node-api-${ENVIRONMENT}"
94+
local service_name="${STACK_PREFIX}-node-api-${ENVIRONMENT}"
95+
96+
# Use || true to handle expected failures gracefully
97+
local status
98+
status=$(aws ecs describe-services --cluster "$cluster_name" --services "$service_name" \
99+
--query 'services[0].status' --output text 2>/dev/null || echo "not-found")
100+
101+
if [ "$status" = "None" ] || [ -z "$status" ]; then
102+
echo "not-found"
103+
else
104+
echo "$status"
105+
fi
61106
}
62107

63108
# Pause ECS service by setting min/max capacity to 0
@@ -66,42 +111,73 @@ function pause_ecs_service() {
66111
local service_name="${STACK_PREFIX}-node-api-${ENVIRONMENT}"
67112
local cluster_status=$1
68113

114+
echo "ECS cluster status: ${cluster_status}"
115+
116+
if [ "$cluster_status" = "not-found" ]; then
117+
echo "ECS cluster $cluster_name does not exist - skipping pause operation"
118+
return 0
119+
fi
120+
69121
if [ "$cluster_status" != "ACTIVE" ]; then
70-
echo "Skipping ECS pause operation: Cluster $cluster_name does not exist"
71-
return
122+
echo "ECS cluster $cluster_name is not active (status: ${cluster_status}) - skipping pause operation"
123+
return 0
72124
fi
73125

74-
local service_status=$(aws ecs describe-services --cluster "$cluster_name" --services "$service_name" \
75-
--query 'services[0].status' --output text 2>/dev/null || echo "INACTIVE")
126+
local service_status=$(check_ecs_service)
127+
echo "ECS service status: ${service_status}"
128+
129+
if [ "$service_status" = "not-found" ]; then
130+
echo "ECS service $service_name does not exist in cluster $cluster_name - skipping pause operation"
131+
return 0
132+
fi
76133

77134
if [ "$service_status" = "ACTIVE" ]; then
78135
echo "Scaling down ECS service: $service_name"
79-
aws application-autoscaling register-scalable-target \
136+
137+
# Use if/then structure for better error handling
138+
if ! aws application-autoscaling register-scalable-target \
80139
--service-namespace ecs \
81140
--resource-id "service/$cluster_name/$service_name" \
82141
--scalable-dimension ecs:service:DesiredCount \
83142
--min-capacity 0 \
84143
--max-capacity 0 \
85144
--no-cli-pager \
86-
--output json
145+
--output json; then
146+
echo "Failed to scale down ECS service: $service_name"
147+
return 1
148+
else
149+
echo "ECS service scaled down successfully"
150+
fi
151+
elif [ "$service_status" = "DRAINING" ]; then
152+
echo "ECS service is already draining - no action needed"
87153
else
88-
echo "ECS service $service_name does not exist in cluster $cluster_name"
154+
echo "ECS service is in state: $service_status - no action taken"
89155
fi
156+
157+
return 0
90158
}
91159

92160
# Main execution
93161
validate_args
94162

163+
echo "Starting pause operations for environment: ${ENVIRONMENT} with stack prefix: ${STACK_PREFIX}"
164+
95165
# Check and pause Aurora cluster
96166
aurora_status=$(check_aurora_cluster)
97-
[ "$aurora_status" = "false" ] || echo "Aurora cluster status: $aurora_status"
167+
if pause_aurora_cluster "$aurora_status"; then
168+
echo "Aurora cluster operations completed successfully"
169+
else
170+
echo "Aurora cluster operations failed"
171+
exit 1
172+
fi
98173

99174
# Check and pause ECS service
100175
ecs_status=$(check_ecs_cluster)
101-
[ "$ecs_status" = "INACTIVE" ] || echo "ECS cluster status: $ecs_status"
102-
103-
# Perform pause operations
104-
pause_ecs_service "$ecs_status"
105-
pause_aurora_cluster "$aurora_status"
176+
if pause_ecs_service "$ecs_status"; then
177+
echo "ECS service operations completed successfully"
178+
else
179+
echo "ECS service operations failed"
180+
exit 1
181+
fi
106182

107-
echo "Pause operations completed"
183+
echo "All pause operations completed successfully"

.github/scripts/resume.sh

Lines changed: 100 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#!/bin/bash
22
# This script resumes AWS resources (ECS service and RDS Aurora cluster) in the specified AWS account.
3+
# Made idempotent - safe to run multiple times, checks resource existence before acting.
34

4-
set -e # Exit on error
5+
set -euo pipefail # Exit on error, undefined variables, and pipe failures
56

67
# Error handling function
78
error_handler() {
@@ -11,7 +12,7 @@ error_handler() {
1112
exit 1
1213
}
1314

14-
# Set trap for error handling
15+
# Set trap for error handling (but not for resource not found errors)
1516
trap 'error_handler ${LINENO} ${FUNCNAME[0]}' ERR
1617

1718
# Function to check if required parameters are provided
@@ -31,7 +32,11 @@ check_db_cluster() {
3132
local prefix=$1
3233
local env=$2
3334
local cluster_id="${prefix}-aurora-${env}"
34-
local status=$(aws rds describe-db-clusters --db-cluster-identifier ${cluster_id} --query 'DBClusters[0].Status' --output text 2>/dev/null || echo "not-found")
35+
36+
# Use || true to handle expected failures gracefully
37+
local status
38+
status=$(aws rds describe-db-clusters --db-cluster-identifier "${cluster_id}" --query 'DBClusters[0].Status' --output text 2>/dev/null || echo "not-found")
39+
3540
echo "$status"
3641
}
3742

@@ -42,56 +47,109 @@ start_db_cluster() {
4247
local cluster_id="${prefix}-aurora-${env}"
4348

4449
echo "Starting DB cluster ${cluster_id}..."
45-
aws rds start-db-cluster --db-cluster-identifier ${cluster_id} --no-cli-pager --output json
50+
51+
# Start the cluster and capture result
52+
if ! aws rds start-db-cluster --db-cluster-identifier "${cluster_id}" --no-cli-pager --output json; then
53+
echo "Failed to start DB cluster ${cluster_id}"
54+
return 1
55+
fi
4656

4757
echo "Waiting for DB cluster to be available..."
48-
if ! aws rds wait db-cluster-available --db-cluster-identifier ${cluster_id}; then
49-
echo "Timeout waiting for DB cluster to become available"
58+
if ! aws rds wait db-cluster-available --db-cluster-identifier "${cluster_id}"; then
59+
echo "Timeout or error waiting for DB cluster to become available"
5060
return 1
5161
fi
5262

5363
echo "DB cluster is now available"
5464
return 0
5565
}
5666

67+
# Function to check if ECS cluster exists
68+
check_ecs_cluster() {
69+
local cluster=$1
70+
71+
# Use || true to handle expected failures gracefully
72+
local status
73+
status=$(aws ecs describe-clusters --clusters "${cluster}" --query 'clusters[0].status' --output text 2>/dev/null || echo "not-found")
74+
75+
if [ "$status" = "None" ] || [ -z "$status" ]; then
76+
echo "not-found"
77+
else
78+
echo "$status"
79+
fi
80+
}
81+
82+
# Function to check if ECS service exists
83+
check_ecs_service() {
84+
local cluster=$1
85+
local service=$2
86+
87+
# Use || true to handle expected failures gracefully
88+
local status
89+
status=$(aws ecs describe-services --cluster "${cluster}" --services "${service}" --query 'services[0].status' --output text 2>/dev/null || echo "not-found")
90+
91+
if [ "$status" = "None" ] || [ -z "$status" ]; then
92+
echo "not-found"
93+
else
94+
echo "$status"
95+
fi
96+
}
97+
5798
# Function to resume ECS service
5899
resume_ecs_service() {
59100
local prefix=$1
60101
local env=$2
61102
local cluster="${prefix}-node-api-${env}"
62103
local service="${prefix}-node-api-${env}"
63104

64-
echo "Resuming ECS service ${service} on cluster ${cluster}..."
65-
# Check if the ECS cluster exists
66-
if ! aws ecs describe-clusters --clusters "${cluster}" --query 'clusters[0]' --output text &>/dev/null; then
105+
echo "Checking ECS cluster ${cluster}..."
106+
local cluster_status=$(check_ecs_cluster "${cluster}")
107+
108+
if [ "$cluster_status" = "not-found" ]; then
67109
echo "ECS cluster ${cluster} does not exist. Skipping service resume."
68110
return 0
69111
fi
112+
113+
if [ "$cluster_status" != "ACTIVE" ]; then
114+
echo "ECS cluster ${cluster} is not active (status: ${cluster_status}). Skipping service resume."
115+
return 0
116+
fi
70117

71-
# Check if the ECS service exists
72-
if ! aws ecs describe-services --cluster "${cluster}" --services "${service}" --query 'services[0]' --output text &>/dev/null; then
118+
echo "Checking ECS service ${service}..."
119+
local service_status=$(check_ecs_service "${cluster}" "${service}")
120+
121+
if [ "$service_status" = "not-found" ]; then
73122
echo "ECS service ${service} does not exist in cluster ${cluster}. Skipping service resume."
74123
return 0
75124
fi
76-
# Update scaling policy
77-
aws application-autoscaling register-scalable-target \
125+
126+
echo "Resuming ECS service ${service} on cluster ${cluster}..."
127+
128+
# Update scaling policy - use || true to handle potential failures
129+
if ! aws application-autoscaling register-scalable-target \
78130
--service-namespace ecs \
79-
--resource-id service/${cluster}/${service} \
131+
--resource-id "service/${cluster}/${service}" \
80132
--scalable-dimension ecs:service:DesiredCount \
81133
--min-capacity 1 \
82134
--max-capacity 2 \
83135
--no-cli-pager \
84-
--output json
136+
--output json; then
137+
echo "Warning: Failed to update scaling policy for ECS service ${service}"
138+
fi
85139

86140
# Update service desired count
87-
aws ecs update-service \
88-
--cluster ${cluster} \
89-
--service ${service} \
141+
if ! aws ecs update-service \
142+
--cluster "${cluster}" \
143+
--service "${service}" \
90144
--desired-count 1 \
91145
--no-cli-pager \
92-
--output json
146+
--output json; then
147+
echo "Failed to update ECS service ${service} desired count"
148+
return 1
149+
fi
93150

94151
echo "ECS service has been resumed"
152+
return 0
95153
}
96154

97155
# Main function
@@ -103,20 +161,35 @@ main() {
103161

104162
# Check DB cluster status
105163
local db_status=$(check_db_cluster "$prefix" "$env")
164+
echo "DB cluster status: ${db_status}"
106165

107-
if [ "$db_status" == "not-found" ]; then
108-
echo "Skipping resume operation, DB cluster does not exist"
109-
return 0
110-
elif [ "$db_status" == "stopped" ]; then
111-
start_db_cluster "$prefix" "$env" || return 1
166+
if [ "$db_status" = "not-found" ]; then
167+
echo "DB cluster does not exist - skipping DB operations"
168+
elif [ "$db_status" = "stopped" ]; then
169+
echo "DB cluster is stopped - starting it..."
170+
if start_db_cluster "$prefix" "$env"; then
171+
echo "DB cluster started successfully"
172+
else
173+
echo "Failed to start DB cluster"
174+
return 1
175+
fi
176+
elif [ "$db_status" = "available" ]; then
177+
echo "DB cluster is already available - no action needed"
178+
elif [ "$db_status" = "starting" ]; then
179+
echo "DB cluster is already starting - no action needed"
112180
else
113-
echo "DB cluster is not in a stopped state. Current state: $db_status"
181+
echo "DB cluster is in state: $db_status - no action taken"
114182
fi
115183

116184
# Resume ECS service
117-
resume_ecs_service "$prefix" "$env"
185+
if resume_ecs_service "$prefix" "$env"; then
186+
echo "ECS service operations completed successfully"
187+
else
188+
echo "ECS service operations failed"
189+
return 1
190+
fi
118191

119-
echo "Resources have been resumed successfully"
192+
echo "Resources resume operations completed successfully"
120193
}
121194

122195
# Parse and check arguments

0 commit comments

Comments
 (0)