Skip to content

Internal/deploy front end #43

Internal/deploy front end

Internal/deploy front end #43

Workflow file for this run

name: Build and deploy infrastructure
#on:
# push:
# branches: [ main ]
# paths:
# - 'client/**'
# - 'prediction_ack/**'
# - 'prediction_worker/**'
# - 'prediction_getter/**'
# - 'model_inference/**'
# - 'model_trainer/**'
# - 'model_training/**'
# - 'ratings_exporter/**'
# - 'model_serving_template.yaml'
# - 'model_training_template.yaml'
on:
pull_request:
branches: [ main ]
env:
DOCKER_REGISTRY_URL: ${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com
DOCKER_INFERENCE_REPO_URL: ${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ vars.ECR_INFERENCE_REPO_NAME }}
DOCKER_TRAINING_REPO_URL: ${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ vars.ECR_TRAINING_REPO_NAME }}
MODEL_TRAINING_STEP_FUNCTION_ARN: ${{ vars.MODEL_TRAINING_STEP_FUNCTION }}
jobs:
model-inference-changes:
runs-on: ubuntu-latest
permissions:
pull-requests: read
contents: read
outputs:
changed: ${{ steps.filter.outputs.changed }}
steps:
- name: Checkout main
uses: actions/checkout@v2
- uses: dorny/paths-filter@v2
id: filter
with:
filters: |
changed:
- 'model_inference/version'
model-training-changes:
runs-on: ubuntu-latest
permissions:
pull-requests: read
contents: read
outputs:
changed: ${{ steps.filter.outputs.changed }}
steps:
- name: Checkout main
uses: actions/checkout@v2
- uses: dorny/paths-filter@v2
id: filter
with:
filters: |
changed:
- 'model_training/training_version'
- 'model_training/data_version'
client-changes:
runs-on: ubuntu-latest
permissions:
pull-requests: read
contents: read
outputs:
changed: ${{ steps.filter.outputs.changed }}
steps:
- name: Checkout main
uses: actions/checkout@v2
- uses: dorny/paths-filter@v2
id: filter
with:
filters: |
changed:
- 'client/**'
upload-model-inference-image:
name: Upload model inference image to ECR
needs: [model-inference-changes]
if: ${{ needs.model-inference-changes.outputs.changed == 'true' }}
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Checkout main
uses: actions/checkout@v2
- name: Set DOCKER_IMAGE_TAG environment variable
run: echo "DOCKER_IMAGE_TAG=$(cat model_inference/version)" >> $GITHUB_ENV
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
version: latest
driver-opts: 'image=moby/buildkit:v0.10.5'
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }}
aws-region: ${{ vars.AWS_REGION }}
- name: Login to ECR registry
run: aws ecr get-login-password --region ${{ vars.AWS_REGION }} | docker login --username AWS --password-stdin ${{ env.DOCKER_REGISTRY_URL }}
- name: Build and push container image
uses: docker/build-push-action@v5
with:
context: model_inference
platforms: linux/amd64
tags: ${{ env.DOCKER_INFERENCE_REPO_URL }}:${{ env.DOCKER_IMAGE_TAG }}
push: true
upload-model-training-image:
name: Upload model training image to ECR
needs: [ model-training-changes ]
if: ${{ needs.model-training-changes.outputs.changed == 'true' }}
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Checkout main
uses: actions/checkout@v2
- name: Set DOCKER_IMAGE_TAG environment variable
run: echo "DOCKER_IMAGE_TAG=$(cat model_training/training_version)" >> $GITHUB_ENV
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
version: latest
driver-opts: 'image=moby/buildkit:v0.10.5'
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }}
aws-region: ${{ vars.AWS_REGION }}
- name: Login to ECR registry
run: aws ecr get-login-password --region ${{ vars.AWS_REGION }} | docker login --username AWS --password-stdin ${{ env.DOCKER_REGISTRY_URL }}
- name: Build and push container image
uses: docker/build-push-action@v5
with:
context: model_training
platforms: linux/amd64
tags: ${{ env.DOCKER_TRAINING_REPO_URL }}:${{ env.DOCKER_IMAGE_TAG }}
push: true
deploy-model-training:
name: Build and deploy model training infrastructure
if: ${{ always() }}
needs: [model-training-changes, upload-model-training-image]
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v1
- uses: aws-actions/setup-sam@v2
with:
use-installer: true
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }}
aws-region: ${{ vars.AWS_REGION }}
- name: Build model training template
run: make build-model-training-prod
- name: Deploy model training
run: make deploy-model-training-prod-ci
trigger-model-training:
name: Trigger data export and model re-training
needs: [model-training-changes, deploy-model-training]
if: ${{ needs.model-training-changes.outputs.changed == 'true' }}
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Checkout main
uses: actions/checkout@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }}
aws-region: ${{ vars.AWS_REGION }}
- name: Set MODEL_TRAINING_VERSION environment variable
run: echo "MODEL_TRAINING_VERSION=$(cat model_training/training_version)" >> $GITHUB_ENV
- name: Set MODEL_DATA_VERSION environment variable
run: echo "MODEL_DATA_VERSION=$(cat model_training/data_version)" >> $GITHUB_ENV
- name: Trigger data export and model training workflow
run: |
execution_arn=$(aws stepfunctions start-execution --state-machine-arn ${{ env.MODEL_TRAINING_STEP_FUNCTION_ARN }} --query 'executionArn' --output text)
echo "Execution started with ARN: $execution_arn"
while true; do
status=$(aws stepfunctions describe-execution --execution-arn $execution_arn --query 'status' --output text)
if [[ $status == "SUCCEEDED" ]]; then
echo "Execution finished with status: $status"
break
elif [[ $status == "FAILED" || $status == "TIMED_OUT" ]]; then
echo "Execution finished with status: $status"
exit 1;
fi
sleep 10
done
- name: Wait for training job completion
run: |
training_job_name="canihaveatvshowplz-prod-model-${{ env.MODEL_TRAINING_VERSION }}-data-${{ env.MODEL_DATA_VERSION }}"
timeout=43200
start_time=$(date +%s)
while true; do
response=$(aws sagemaker describe-training-job --training-job-name "$training_job_name" 2>/dev/null)
if [[ -z $response ]]; then
echo "Training job $training_job_name not found."
exit 1
fi
status=$(echo "$response" | jq -r '.TrainingJobStatus')
if [[ $status == "Completed" ]]; then
echo "Training job $training_job_name finished with status: $status"
exit 0
fi
if [[ $status == "Failed" || $status == "Stopped" ]]; then
echo "Training job $training_job_name finished with status: $status"
exit 1
fi
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Training job $training_job_name not finished after $timeout seconds."
exit 1
fi
sleep 15
done
deploy-model-serving:
name: Build and deploy model serving infrastructure
if: ${{ always() }}
needs: [deploy-model-training, trigger-model-training, model-inference-changes, upload-model-inference-image, model-training-changes, upload-model-training-image]
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v1
- uses: aws-actions/setup-sam@v2
with:
use-installer: true
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }}
aws-region: ${{ vars.AWS_REGION }}
- name: Build model serving template
run: make build-model-serving-prod
- name: Deploy model training
run: make deploy-model-serving-prod-ci
upload-client:
name: Upload client to S3
needs: [deploy-model-serving, client-changes]
if: ${{ always() && needs.client-changes.outputs.changed == 'true' }}
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Checkout main
uses: actions/checkout@v2
- name: Setup Node
uses: actions/setup-node@v3
with:
node-version: 20
- name: Install node dependencies
run: npm ci
working-directory: client
- name: Build production artifact
run: npm run build
working-directory: client
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }}
aws-region: ${{ vars.AWS_REGION }}
- name: Deploy client to S3
run: |
aws s3 sync dist ${{ vars.BUCKET_ID }}
working-directory: client
- name: Invalidate Cloudfront cache
run: |
aws cloudfront create-invalidation --distribution-id ${{ vars.CLOUDFRONT_ID }} --paths "/*";
working-directory: client