DOSCT-100 Add ECR Image Clean Up (#205)

# Task Branch Pull Request ## Description of Changes This PR adds a python script for cleaning up ECR images. These images include untagged and images which count as old development images.
nhsd-exeter · May 22, 2023 · 3830a1e · 3830a1e
1 parent 9822381
commit 3830a1e
Show file tree

Hide file tree

Showing 8 changed files with 112 additions and 4 deletions.
diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml
@@ -170,10 +170,10 @@ jobs:
         with:
           projectBaseDir: .
           args: >
-            -Dsonar.sources=application,deployment,infrastructure,build/docker
+            -Dsonar.sources=application,deployment,infrastructure,build/docker,scripts
             -Dsonar.organization=nhsd-exeter
             -Dsonar.projectKey=uec-dos-ct
-            -Dsonar.coverage.exclusions=**/__test__/**,deployment,infrastructure,application/ui/src/index.tsx,application/ui/jest.config.js,application/ui/craco.config.js,application/ui/src/reportWebVitals.ts,application/ui/src/setupTests.ts,application/conftest.py,application/*/tests/**.py
+            -Dsonar.coverage.exclusions=**/__test__/**,deployment,infrastructure,scripts/**.py,application/ui/src/index.tsx,application/ui/jest.config.js,application/ui/craco.config.js,application/ui/src/reportWebVitals.ts,application/ui/src/setupTests.ts,application/conftest.py,application/*/tests/**.py
             -Dsonar.javascript.lcov.reportPaths=application/ui/coverage/lcov.info
             -Dsonar.python.coverage.reportPaths=coverage.xml
             -Dsonar.python.version=3.10

diff --git a/Makefile b/Makefile
@@ -295,7 +295,7 @@ check-for-environment: # Check if environment exists - mandatory: ENVIRONMENT
 	fi
 
 # ==============================================================================
-# Environment Clean up
+# Environment & ECR Image Clean up
 
 get-environment-list: # Gets a full list of all DCT environments - mandatory: PROFILE=[name]
 	eval "$$(make aws-assume-role-export-variables)"
@@ -314,6 +314,10 @@ clean-up-environments: # Cleans up all DCT environments - mandatory: PROFILE=[na
 		make undeploy ENVIRONMENT=$$env
 	done
 
+clean-up-ecr-repositories: # Cleans up all ECR repositories - mandatory: PROFILE=[name]
+	python -m pip install -r scripts/clean_up_ecr_repositories/requirements.txt
+	python scripts/clean_up_ecr_repositories/clean_up_ecr_repositories.py
+
 # ==============================================================================
 # Checkov (Code Security Best Practices)
 

diff --git a/build/automation/var/project.mk b/build/automation/var/project.mk
@@ -22,6 +22,9 @@ TF_VAR_aws_vpc_name = $(AWS_VPC_NAME)
 
 CONFIGURATION_BUCKET = $(PROJECT_ID)-configuration-bucket
 TF_VAR_developer_role_name := Developer
+
+ECR_REPOSITORY_PREFIX := $(PROJECT_GROUP_SHORT)/$(PROJECT_NAME_SHORT)
+ECR_REPOSITORIES := search,data,ui
 # ==============================================================================
 # Pipeline
 

diff --git a/...deployment-tools/buildspecs/stand_alone_codebuild_stages/clean_up_resources_buildspec.yml b/...deployment-tools/buildspecs/stand_alone_codebuild_stages/clean_up_resources_buildspec.yml
@@ -17,4 +17,4 @@ phases:
       # Clean up resources from non-prod environments
       - make clean-up-environments PROFILE=$PROFILE
       # Clean up ECR repositories with old images
-      # - make clean-up-ecr-repositories PROFILE=$PROFILE
+      - make clean-up-ecr-repositories PROFILE=$PROFILE
diff --git a/pyproject.toml b/pyproject.toml
@@ -63,6 +63,9 @@ target-version = "py310"
   "SLF001", # Allow `_function` in tests.
   "PLR0913", # Allow many arrguments in tests.
 ]
+"scripts/**.py"= [
+  "T201", # Allow print statements in scripts.
+]
 
 [tool.ruff.pydocstyle]
 convention = "google" # Use Google docstring convention.

diff --git a/scripts/clean_up_ecr_repositories/__init__.py b/scripts/clean_up_ecr_repositories/__init__.py
diff --git a/scripts/clean_up_ecr_repositories/clean_up_ecr_repositories.py b/scripts/clean_up_ecr_repositories/clean_up_ecr_repositories.py
@@ -0,0 +1,96 @@
+from datetime import datetime, timedelta
+from os import getenv
+
+from boto3 import client
+from pytz import timezone
+
+ecr_client = client("ecr")
+repository_prefix = getenv("ECR_REPOSITORY_PREFIX")
+mgmt_account_id = getenv("AWS_ACCOUNT_ID_MGMT")
+NEXT_TOKEN_DEFAULT_VALUE = "NextToken"  # noqa: S105
+
+
+def delete_ecr_images(repository_name: str, image_digests: list) -> None:
+    """Delete images from ECR repositories."""
+    ecr_client.batch_delete_image(repositoryName=repository_name, registryId=mgmt_account_id, imageIds=image_digests)
+
+
+def delete_untagged_images(repository: str) -> None:
+    """Delete untagged images from ECR repositories."""
+    next_token = NEXT_TOKEN_DEFAULT_VALUE
+    while next_token:
+        if next_token == NEXT_TOKEN_DEFAULT_VALUE:
+            response = ecr_client.describe_images(
+                repositoryName=repository,
+                registryId=mgmt_account_id,
+                maxResults=100,
+                filter={
+                    "tagStatus": "UNTAGGED",
+                },
+            )
+        else:
+            response = ecr_client.describe_images(
+                repositoryName=repository,
+                registryId=mgmt_account_id,
+                maxResults=100,
+                nextToken=next_token,
+                filter={
+                    "tagStatus": "UNTAGGED",
+                },
+            )
+        images = response["imageDetails"]
+        next_token = response.get("nextToken")
+        image_digests = [{"imageDigest": image["imageDigest"]} for image in images]
+
+        if not image_digests:
+            return
+
+        delete_ecr_images(repository, image_digests)
+        print(f"Deleting {len(image_digests)} untagged images")
+
+
+def delete_old_task_images(repository: str) -> None:
+    """Delete old images from ECR repositories."""
+    date = datetime.now(timezone("Europe/London")) - timedelta(days=30)
+    next_token = NEXT_TOKEN_DEFAULT_VALUE
+    while next_token:
+        if next_token == NEXT_TOKEN_DEFAULT_VALUE:
+            response = ecr_client.describe_images(
+                repositoryName=repository,
+                registryId=mgmt_account_id,
+                maxResults=100,
+                filter={
+                    "tagStatus": "TAGGED",
+                },
+            )
+        else:
+            response = ecr_client.describe_images(
+                repositoryName=repository,
+                registryId=mgmt_account_id,
+                maxResults=100,
+                nextToken=next_token,
+                filter={
+                    "tagStatus": "TAGGED",
+                },
+            )
+        images = response["imageDetails"]
+        response.get("nextToken")
+        image_digests = [
+            {"imageDigest": image["imageDigest"]}
+            for image in images
+            if image["imagePushedAt"] < date and "2023" in image["imageTags"][0]
+        ]
+
+        if not image_digests:
+            return
+
+        delete_ecr_images(repository, image_digests)
+        print(f"Deleting {len(image_digests)} old development images")
+
+
+if __name__ == "__main__":
+    for repository in getenv("ECR_REPOSITORIES").split(","):
+        print(f"Cleaning up {repository}")
+        repository_name = f"{repository_prefix}/{repository}"
+        delete_old_task_images(repository=repository_name)
+        delete_untagged_images(repository=repository_name)
diff --git a/scripts/clean_up_ecr_repositories/requirements.txt b/scripts/clean_up_ecr_repositories/requirements.txt
@@ -0,0 +1,2 @@
+boto3
+pytz