Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Not for review- Test aws resource cleanup #245

Draft
wants to merge 25 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d64df03
unused aws instace and vpcs cleanup
shivakunv Dec 17, 2024
54aafd9
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
7dc26d8
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
1410d04
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
82291ce
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
0d6f55c
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
ce7a9d0
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
f32956d
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
1704b44
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
31e23a5
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
4d05d00
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
b6dabe2
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
bc1c896
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
385fcb7
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
34988d4
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
9264ab2
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
af705c7
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
edd8729
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
bac82fc
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
69264e6
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
dd0d16c
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
c07f700
unused aws instace and vpcs cleanup
shivakunv Dec 19, 2024
014a6e0
Add script to clean-up unused AWS EC2 Instances and VPCs
shivakunv Dec 20, 2024
aa1d57e
Add script to clean-up unused AWS EC2 Instances and VPCs
shivakunv Dec 20, 2024
d306460
Add script to clean-up unused AWS EC2 Instances and VPCs
shivakunv Dec 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions .github/workflows/awscleanup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: Daily AWS Cleanup Bot

on:
pull_request:
types:
- opened
- synchronize
branches:
- test-awsresourcecleanup
push:
branches:
- test-awsresourcecleanup

jobs:
cleanup:
runs-on: linux-amd64-cpu4

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up AWS CLI
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-1

- name: Identify resources for deletion
id: identify-resources
run: |
# Find vpcs with names ci*
vpcs=$(aws ec2 describe-vpcs \
--filters "Name=tag:Name,Values=ci*" \
--query "Vpcs[].VpcId" \
--output text | tr -d '\r' | tr '\n' ' ')
echo "Found VPCs: $vpcs"
echo "AWS_VPC_IDS=$vpcs" >> $GITHUB_ENV

- name: Clean up VPCs
if: env.AWS_VPC_IDS != ''
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
for vpcid in $AWS_VPC_IDS; do
scripts/awscleanup.sh $vpcid
done

- name: Post cleanup
run: |
echo "Cleanup completed."
10 changes: 6 additions & 4 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ on:
- opened
- synchronize
branches:
- main
- release-*
- main-no
- release-no
- test-awsresourcecleanup
push:
branches:
- main
- release-*
- main-no
- release-no
- test-awsresourcecleanup
schedule:
- cron: '31 11 * * 4'

Expand Down
13 changes: 7 additions & 6 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
name: End-to-end Tests

on:
workflow_run:
workflows: [Go]
pull_request:
types:
- completed
- opened
- synchronize
branches:
- "pull-request/[0-9]+"
- main
- release-*
- test-awsresourcecleanup
push:
branches:
- test-awsresourcecleanup

jobs:
e2e-test:
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ name: Go
on:
push:
branches:
- main
- release-*
- main-no
- release-no
- test-awsresourcecleanup
pull_request:
branches:
- main
- release-*
- main-no
- release-no
- test-awsresourcecleanup

jobs:
build:
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,16 @@ name: Image
on:
pull_request:
branches:
- 'main'
- 'release-*'
- 'main-no'
- 'release-no'
- test-awsresourcecleanup
push:
tags:
- 'v*.*.*'
branches:
- 'main'
- 'release-*'
- 'main-no'
- 'release-no'
- test-awsresourcecleanup

jobs:
docker:
Expand Down
149 changes: 149 additions & 0 deletions scripts/awscleanup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/bin/bash

if [[ $# -ne 1 ]]; then
echo " vpcid required for deletion"
exit 1
fi

export vpcid=$1

get_tag_value(){
if [[ $# -ne 2 ]]; then
echo " vpcid and key required to get tag value"
exit 1
fi
local vpc=$1
local key=$2
aws ec2 describe-tags --filters "Name=resource-id,Values=$vpcid" "Name=key,Values=$key" \
--query "Tags[0].Value" --output text
}

delete_vpc_resources() {
if [[ $# -ne 1 ]]; then
echo " vpcid required for deletion"
exit 1
fi
local vpcid=$1

echo "Start cleanup of resources in VPC: $vpcid"

# Delete Instance
instances=$(aws ec2 describe-instances \
--filters "Name=vpc-id,Values=$vpcid" \
--query "Reservations[].Instances[].InstanceId" \
--output text | tr -d '\r' | tr '\n' ' ')
for instance in $instances; do
aws ec2 terminate-instances --instance-ids "$instance"
done

# Detach and Delete Security Groups
security_groups=$(aws ec2 describe-security-groups \
--filters Name=vpc-id,Values=$vpcid \
--query "SecurityGroups[?GroupName!='default'].GroupId" \
--output text | tr -d '\r' | tr '\n' ' ')
for sg in $security_groups; do
enis=$(aws ec2 describe-network-interfaces \
--filters Name=group-id,Values=$sg \
--query "NetworkInterfaces[].NetworkInterfaceId" \
--output text | tr -d '\r' | tr '\n' ' ')
for eni in $enis; do
aws ec2 modify-network-interface-attribute \
--network-interface-id "$eni" \
--groups "$(aws ec2 describe-security-groups \
--query 'SecurityGroups[?GroupName==`default`].GroupId' \
--output text)"
done
aws ec2 delete-security-group --group-id "$sg"
done

# Delete Subnets
subnets=$(aws ec2 describe-subnets \
--filters Name=vpc-id,Values=$vpcid \
--query "Subnets[].SubnetId" \
--output text | tr -d '\r' | tr '\n' ' ')
for subnet in $subnets; do
aws ec2 delete-subnet --subnet-id "$subnet"
done

# Delete Route Tables
# 1. Make first rt as Main , as we cannot delete vpcs attached with main
# 2. replace all rt with first rt
# 3. delete rt
# 4. Main table(first_rt) will be deleted once vpc deleted
first_rt=""
route_tables=$(aws ec2 describe-route-tables \
--filters Name=vpc-id,Values=$vpcid \
--query "RouteTables[].RouteTableId" \
--output text | tr -d '\r' | tr '\n' ' ')
for rt in $route_tables; do
associations=$(aws ec2 describe-route-tables \
--route-table-ids "$rt" \
--query "RouteTables[].Associations[].RouteTableAssociationId" \
--output text | tr -d '\r' | tr '\n' ' ')
for assoc_id in $associations; do
if [ -z "$first_rt" ]; then
aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $rt
first_rt=$rt
else
aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $first_rt
fi
done
aws ec2 delete-route-table --route-table-id "$rt" 2>>/dev/null
done

# Delete Internet Gateway
internet_gateways=$(aws ec2 describe-internet-gateways \
--filters Name=attachment.vpc-id,Values=$vpcid \
--query "InternetGateways[].InternetGatewayId" \
--output text | tr -d '\r' | tr '\n' ' ')
for igw in $internet_gateways; do
aws ec2 detach-internet-gateway --internet-gateway-id "$igw" --vpc-id "$vpcid"
aws ec2 delete-internet-gateway --internet-gateway-id "$igw"
done

# Delete vpc
# try 3 times with 30 seconds interval
attempts=0
echo "All resource Deleted for VPC: $vpcid , now delete vpc"
while [ $attempts -lt 3 ]; do
if aws ec2 delete-vpc --vpc-id $vpcid; then
echo "Successfully deleted VPC: $vpcid"
break
else
attempts=$((attempts + 1))
if [ $attempts -lt 3 ]; then
echo "Failed to delete VPC: $vpcid. Retrying in 30 seconds..."
sleep 30
fi
fi
done
if [ $attempts -eq 3 ]; then
echo "Failed to delete VPC: $vpcid after 3 attempts. Continue the loop to delete other vpc"
fi
}

github_repository=$(get_tag_value $vpcid "GitHubRepository")
run_id=$(get_tag_value $vpcid "GitHubRunId")
job_name=$(get_tag_value $vpcid "GitHubJob")
response=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
"https://api.github.com/repos/${github_repository}/actions/runs/${run_id}/jobs")
if [[ -z "$response" || "$response" == "null" ]]; then
exit 0
fi

# 1. make sure .jobs exist in response
# e.g. { "message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404" }
# 2. check if all jobs completed

if ! echo "$response" | jq -e '.jobs != null' >/dev/null 2>&1; then
exit 0
fi

is_jobs_not_completed=$(echo "$response" | jq -r ".jobs? // [] |
map(select(.status != \"completed\")) |
length")

if [[ "$is_jobs_not_completed" -eq 0 ]]; then
echo "Holodeck e2e Job status is not in running stage , Delete the vpc $vpcid and dependent resources"
delete_vpc_resources $vpcid
fi
2 changes: 1 addition & 1 deletion tests/aws_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ var _ = Describe("AWS", func() {
Expect(err).ToNot(HaveOccurred())

// Set unique name for the environment
opts.cfg.Name = opts.cfg.Name + "-" + common.GenerateUID()
common.SetCfgName(&opts.cfg)
// set cache path
opts.cachePath = LogArtifactDir
// set cache file
Expand Down
21 changes: 20 additions & 1 deletion tests/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@

package common

import "math/rand"
import (
"fmt"
"math/rand"
"os"

"github.com/NVIDIA/holodeck/api/holodeck/v1alpha1"
)

func GenerateUID() string {
const charset = "abcdefghijklmnopqrstuvwxyz0123456789"
Expand All @@ -28,3 +34,16 @@ func GenerateUID() string {

return string(b)
}

func SetCfgName(cfg *v1alpha1.Environment) {
sha := os.Getenv("GITHUB_SHA")
attempt := os.Getenv("GITHUB_RUN_ATTEMPT")
// short sha
if len(sha) > 8 {
sha = sha[:8]
}
// uid is unique for each run
uid := GenerateUID()

cfg.Name = fmt.Sprintf("ci%s-%s-%s", attempt, sha, uid)
}
Loading