Internal/deploy front end #43
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build and deploy infrastructure | |
#on: | |
# push: | |
# branches: [ main ] | |
# paths: | |
# - 'client/**' | |
# - 'prediction_ack/**' | |
# - 'prediction_worker/**' | |
# - 'prediction_getter/**' | |
# - 'model_inference/**' | |
# - 'model_trainer/**' | |
# - 'model_training/**' | |
# - 'ratings_exporter/**' | |
# - 'model_serving_template.yaml' | |
# - 'model_training_template.yaml' | |
on: | |
pull_request: | |
branches: [ main ] | |
env: | |
DOCKER_REGISTRY_URL: ${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com | |
DOCKER_INFERENCE_REPO_URL: ${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ vars.ECR_INFERENCE_REPO_NAME }} | |
DOCKER_TRAINING_REPO_URL: ${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ vars.ECR_TRAINING_REPO_NAME }} | |
MODEL_TRAINING_STEP_FUNCTION_ARN: ${{ vars.MODEL_TRAINING_STEP_FUNCTION }} | |
jobs: | |
model-inference-changes: | |
runs-on: ubuntu-latest | |
permissions: | |
pull-requests: read | |
contents: read | |
outputs: | |
changed: ${{ steps.filter.outputs.changed }} | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v2 | |
- uses: dorny/paths-filter@v2 | |
id: filter | |
with: | |
filters: | | |
changed: | |
- 'model_inference/version' | |
model-training-changes: | |
runs-on: ubuntu-latest | |
permissions: | |
pull-requests: read | |
contents: read | |
outputs: | |
changed: ${{ steps.filter.outputs.changed }} | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v2 | |
- uses: dorny/paths-filter@v2 | |
id: filter | |
with: | |
filters: | | |
changed: | |
- 'model_training/training_version' | |
- 'model_training/data_version' | |
client-changes: | |
runs-on: ubuntu-latest | |
permissions: | |
pull-requests: read | |
contents: read | |
outputs: | |
changed: ${{ steps.filter.outputs.changed }} | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v2 | |
- uses: dorny/paths-filter@v2 | |
id: filter | |
with: | |
filters: | | |
changed: | |
- 'client/**' | |
upload-model-inference-image: | |
name: Upload model inference image to ECR | |
needs: [model-inference-changes] | |
if: ${{ needs.model-inference-changes.outputs.changed == 'true' }} | |
runs-on: ubuntu-latest | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v2 | |
- name: Set DOCKER_IMAGE_TAG environment variable | |
run: echo "DOCKER_IMAGE_TAG=$(cat model_inference/version)" >> $GITHUB_ENV | |
- name: Set up QEMU | |
uses: docker/setup-qemu-action@v2 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v2 | |
with: | |
version: latest | |
driver-opts: 'image=moby/buildkit:v0.10.5' | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }} | |
aws-region: ${{ vars.AWS_REGION }} | |
- name: Login to ECR registry | |
run: aws ecr get-login-password --region ${{ vars.AWS_REGION }} | docker login --username AWS --password-stdin ${{ env.DOCKER_REGISTRY_URL }} | |
- name: Build and push container image | |
uses: docker/build-push-action@v5 | |
with: | |
context: model_inference | |
platforms: linux/amd64 | |
tags: ${{ env.DOCKER_INFERENCE_REPO_URL }}:${{ env.DOCKER_IMAGE_TAG }} | |
push: true | |
upload-model-training-image: | |
name: Upload model training image to ECR | |
needs: [ model-training-changes ] | |
if: ${{ needs.model-training-changes.outputs.changed == 'true' }} | |
runs-on: ubuntu-latest | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v2 | |
- name: Set DOCKER_IMAGE_TAG environment variable | |
run: echo "DOCKER_IMAGE_TAG=$(cat model_training/training_version)" >> $GITHUB_ENV | |
- name: Set up QEMU | |
uses: docker/setup-qemu-action@v2 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v2 | |
with: | |
version: latest | |
driver-opts: 'image=moby/buildkit:v0.10.5' | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }} | |
aws-region: ${{ vars.AWS_REGION }} | |
- name: Login to ECR registry | |
run: aws ecr get-login-password --region ${{ vars.AWS_REGION }} | docker login --username AWS --password-stdin ${{ env.DOCKER_REGISTRY_URL }} | |
- name: Build and push container image | |
uses: docker/build-push-action@v5 | |
with: | |
context: model_training | |
platforms: linux/amd64 | |
tags: ${{ env.DOCKER_TRAINING_REPO_URL }}:${{ env.DOCKER_IMAGE_TAG }} | |
push: true | |
deploy-model-training: | |
name: Build and deploy model training infrastructure | |
if: ${{ always() }} | |
needs: [model-training-changes, upload-model-training-image] | |
runs-on: ubuntu-latest | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- uses: actions/checkout@v1 | |
- uses: aws-actions/setup-sam@v2 | |
with: | |
use-installer: true | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }} | |
aws-region: ${{ vars.AWS_REGION }} | |
- name: Build model training template | |
run: make build-model-training-prod | |
- name: Deploy model training | |
run: make deploy-model-training-prod-ci | |
trigger-model-training: | |
name: Trigger data export and model re-training | |
needs: [model-training-changes, deploy-model-training] | |
if: ${{ needs.model-training-changes.outputs.changed == 'true' }} | |
runs-on: ubuntu-latest | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v2 | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }} | |
aws-region: ${{ vars.AWS_REGION }} | |
- name: Set MODEL_TRAINING_VERSION environment variable | |
run: echo "MODEL_TRAINING_VERSION=$(cat model_training/training_version)" >> $GITHUB_ENV | |
- name: Set MODEL_DATA_VERSION environment variable | |
run: echo "MODEL_DATA_VERSION=$(cat model_training/data_version)" >> $GITHUB_ENV | |
- name: Trigger data export and model training workflow | |
run: | | |
execution_arn=$(aws stepfunctions start-execution --state-machine-arn ${{ env.MODEL_TRAINING_STEP_FUNCTION_ARN }} --query 'executionArn' --output text) | |
echo "Execution started with ARN: $execution_arn" | |
while true; do | |
status=$(aws stepfunctions describe-execution --execution-arn $execution_arn --query 'status' --output text) | |
if [[ $status == "SUCCEEDED" ]]; then | |
echo "Execution finished with status: $status" | |
break | |
elif [[ $status == "FAILED" || $status == "TIMED_OUT" ]]; then | |
echo "Execution finished with status: $status" | |
exit 1; | |
fi | |
sleep 10 | |
done | |
- name: Wait for training job completion | |
run: | | |
training_job_name="canihaveatvshowplz-prod-model-${{ env.MODEL_TRAINING_VERSION }}-data-${{ env.MODEL_DATA_VERSION }}" | |
timeout=43200 | |
start_time=$(date +%s) | |
while true; do | |
response=$(aws sagemaker describe-training-job --training-job-name "$training_job_name" 2>/dev/null) | |
if [[ -z $response ]]; then | |
echo "Training job $training_job_name not found." | |
exit 1 | |
fi | |
status=$(echo "$response" | jq -r '.TrainingJobStatus') | |
if [[ $status == "Completed" ]]; then | |
echo "Training job $training_job_name finished with status: $status" | |
exit 0 | |
fi | |
if [[ $status == "Failed" || $status == "Stopped" ]]; then | |
echo "Training job $training_job_name finished with status: $status" | |
exit 1 | |
fi | |
current_time=$(date +%s) | |
elapsed_time=$((current_time - start_time)) | |
if [ $elapsed_time -ge $timeout ]; then | |
echo "Timeout reached. Training job $training_job_name not finished after $timeout seconds." | |
exit 1 | |
fi | |
sleep 15 | |
done | |
deploy-model-serving: | |
name: Build and deploy model serving infrastructure | |
if: ${{ always() }} | |
needs: [deploy-model-training, trigger-model-training, model-inference-changes, upload-model-inference-image, model-training-changes, upload-model-training-image] | |
runs-on: ubuntu-latest | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- uses: actions/checkout@v1 | |
- uses: aws-actions/setup-sam@v2 | |
with: | |
use-installer: true | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }} | |
aws-region: ${{ vars.AWS_REGION }} | |
- name: Build model serving template | |
run: make build-model-serving-prod | |
- name: Deploy model training | |
run: make deploy-model-serving-prod-ci | |
upload-client: | |
name: Upload client to S3 | |
needs: [deploy-model-serving, client-changes] | |
if: ${{ always() && needs.client-changes.outputs.changed == 'true' }} | |
runs-on: ubuntu-latest | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v2 | |
- name: Setup Node | |
uses: actions/setup-node@v3 | |
with: | |
node-version: 20 | |
- name: Install node dependencies | |
run: npm ci | |
working-directory: client | |
- name: Build production artifact | |
run: npm run build | |
working-directory: client | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
role-to-assume: ${{ vars.RUNNER_AWS_ROLE_ID }} | |
aws-region: ${{ vars.AWS_REGION }} | |
- name: Deploy client to S3 | |
run: | | |
aws s3 sync dist ${{ vars.BUCKET_ID }} | |
working-directory: client | |
- name: Invalidate Cloudfront cache | |
run: | | |
aws cloudfront create-invalidation --distribution-id ${{ vars.CLOUDFRONT_ID }} --paths "/*"; | |
working-directory: client |