From 321c92376b6d1e87f225d00f7fcd07c901de41c0 Mon Sep 17 00:00:00 2001 From: Iaroslav Frolikov Date: Mon, 22 Aug 2022 13:01:26 +0300 Subject: [PATCH] Build the merge-into-bq job with a dedicated GHA workflow (#5) * Build the merge-into-bq job with a dedicated GHA workflow * Remove the merge-into-bq build from the Development workflow * Add label to the merge-into-bq Docker image to cause a rebuild * Hack GHA workflow to push the merge-into-bq job image tagged with `latest` * Publish production docker images to the analytics-warehouse-production project * Change production DWH project and dataset for the masterdata layer --- .github/workflows/development.yaml | 96 ----------------------- .github/workflows/merge-into-bq.yaml | 113 +++++++++++++++++++++++++++ .github/workflows/production.yaml | 2 +- dags/masterdata/luigi_task_hist_1.py | 2 +- jobs/merge-into-bq/Dockerfile | 2 + 5 files changed, 117 insertions(+), 98 deletions(-) create mode 100644 .github/workflows/merge-into-bq.yaml diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml index 2f00804..4e73b78 100644 --- a/.github/workflows/development.yaml +++ b/.github/workflows/development.yaml @@ -142,99 +142,3 @@ jobs: - name: 'Upload DAGs' run: | gsutil rsync -rd dags gs://${{ needs.create-composer-env.outputs.bucket }}/dags - - merge-into-bq: - runs-on: [ self-hosted, Linux, X64 ] - concurrency: merge-into-bq-${{ github.event.pull_request.head.ref || github.ref_name }} - steps: - - name: 'Checkout' - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - uses: docker/setup-buildx-action@v2 - - uses: docker/login-action@v1 - with: - registry: gcr.io - username: _json_key - password: ${{ secrets.DATA_COMPOSER_SA_DEV_KEY }} - - - name: 'Get master HEAD commit SHA' - id: master-sha - run: | - git show-ref master -s - echo "::set-output name=value::$(git show-ref master -s)" - - - name: 'Get changed files' - id: changed-files - uses: tj-actions/changed-files@v24 - with: - base_sha: ${{ steps.master-sha.outputs.value }} - files: | - jobs/merge-into-bq/**/* - - - name: 'Setup Python' - if: ${{ steps.changed-files.outputs.any_modified == 'true' }} - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - name: 'Get pip cache dir' - if: ${{ steps.changed-files.outputs.any_modified == 'true' }} - id: pip-cache-dir - run: | - echo "::set-output name=value::$(pip cache dir)" - - name: 'Cache pip' - if: ${{ steps.changed-files.outputs.any_modified == 'true' }} - uses: actions/cache@v2 - with: - path: ${{ steps.pip-cache-dir.outputs.value }} - key: ${{ runner.os }}-poetry-${{ hashFiles('jobs/merge-into-bq/poetry.lock') }} - - - name: 'Setup Poetry' - if: ${{ steps.changed-files.outputs.any_modified == 'true' }} - uses: Gr1N/setup-poetry@v7 - - - name: 'Poetry install' - if: ${{ steps.changed-files.outputs.any_modified == 'true' }} - working-directory: jobs/merge-into-bq - run: poetry install - - - name: 'Pytest' - if: ${{ steps.changed-files.outputs.any_modified == 'true' }} - working-directory: jobs/merge-into-bq - run: poetry run pytest -vv - - - name: 'Get image tag' - if: ${{ steps.changed-files.outputs.any_modified == 'true' }} - id: image-tag - run: | - if [ '${{ github.ref_name }}' == 'master' ]; then - echo "::set-output name=value::gcr.io/toptal-hub/data-composer/jobs/merge-into-bq:latest" - else - echo "::set-output name=value::gcr.io/analytics-warehouse-dev/data-composer/jobs/merge-into-bq:${{ github.ref_name }}" - fi - - - name: 'Cache Docker layers' - if: ${{ steps.changed-files.outputs.any_modified == 'true' }} - uses: actions/cache@v2 - with: - path: /tmp/.buildx-cache-staging - key: ${{ runner.os }}-buildx-staging-${{ github.ref_name }} - restore-keys: ${{ runner.os }}-buildx-staging-master - - - name: 'Docker build and push' - if: ${{ steps.changed-files.outputs.any_modified == 'true' }} - uses: docker/build-push-action@v3 - with: - context: jobs/merge-into-bq - build-contexts: pip_cache=${{ steps.pip-cache-dir.outputs.value }} - push: true - tags: ${{ steps.image-tag.outputs.value }} - cache-from: type=local,src=/tmp/.buildx-cache-staging - cache-to: type=local,dest=/tmp/.buildx-cache-new-staging - - - name: 'Update docker cache' - if: ${{ steps.changed-files.outputs.any_modified == 'true' }} - run: | - rm -rf /tmp/.buildx-cache-staging - mv /tmp/.buildx-cache-new-staging /tmp/.buildx-cache-staging diff --git a/.github/workflows/merge-into-bq.yaml b/.github/workflows/merge-into-bq.yaml new file mode 100644 index 0000000..03e68f5 --- /dev/null +++ b/.github/workflows/merge-into-bq.yaml @@ -0,0 +1,113 @@ +name: merge-into-bq +on: [push] + +env: + DWH_PROJECT: analytics-warehouse-dev + DOCKER_REGISTRY_PROJECT: analytics-warehouse-dev + +jobs: + merge-into-bq: + runs-on: [ self-hosted, Linux, X64 ] + concurrency: merge-into-bq-${{ github.ref_name }} + steps: + - name: 'Checkout' + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - uses: docker/setup-buildx-action@v2 + - name: 'Login to gcr.io/analytics-warehouse-dev' + if: ${{ github.ref_name != 'master' }} + uses: docker/login-action@v1 + with: + registry: gcr.io + username: _json_key + password: ${{ secrets.DATA_COMPOSER_SA_DEV_KEY }} + - name: 'Login to gcr.io/toptal-hub' + if: ${{ github.ref_name == 'master' }} + uses: docker/login-action@v1 + with: + registry: gcr.io + username: _json_key + password: ${{ secrets.DATA_COMPOSER_SA_PROD_KEY }} + + - name: 'Get base commit SHA' + if: ${{ github.ref_name != 'master' }} + id: base-sha + run: | + git show-ref master -s + echo "::set-output name=value::$(git show-ref master -s)" + + - name: 'Get changed files' + id: changed-files + uses: tj-actions/changed-files@v24 + with: + base_sha: ${{ steps.base-sha.outputs.value || github.before }} + files: | + jobs/merge-into-bq/**/* + + - name: 'Setup Python' + if: ${{ steps.changed-files.outputs.any_modified == 'true' }} + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: 'Get pip cache dir' + if: ${{ steps.changed-files.outputs.any_modified == 'true' }} + id: pip-cache-dir + run: | + echo "::set-output name=value::$(pip cache dir)" + - name: 'Cache pip' + if: ${{ steps.changed-files.outputs.any_modified == 'true' }} + uses: actions/cache@v2 + with: + path: ${{ steps.pip-cache-dir.outputs.value }} + key: ${{ runner.os }}-poetry-${{ hashFiles('jobs/merge-into-bq/poetry.lock') }} + + - name: 'Setup Poetry' + if: ${{ steps.changed-files.outputs.any_modified == 'true' }} + uses: Gr1N/setup-poetry@v7 + + - name: 'Poetry install' + if: ${{ steps.changed-files.outputs.any_modified == 'true' }} + working-directory: jobs/merge-into-bq + run: poetry install + + - name: 'Pytest' + if: ${{ steps.changed-files.outputs.any_modified == 'true' }} + working-directory: jobs/merge-into-bq + run: poetry run pytest -vv + + - name: 'Get image tag' + if: ${{ steps.changed-files.outputs.any_modified == 'true' }} + id: image-tag + run: | + if [ '${{ github.ref_name }}' == 'master' ]; then + echo "::set-output name=value::gcr.io/analytics-warehouse-production/data-composer/jobs/merge-into-bq:latest" + else + echo "::set-output name=value::gcr.io/analytics-warehouse-dev/data-composer/jobs/merge-into-bq:${{ github.ref_name }}" + fi + + - name: 'Cache Docker layers' + if: ${{ steps.changed-files.outputs.any_modified == 'true' }} + uses: actions/cache@v2 + with: + path: /tmp/.buildx-cache-staging + key: ${{ runner.os }}-buildx-staging-${{ github.ref_name }} + restore-keys: ${{ runner.os }}-buildx-staging-master + + - name: 'Docker build and push' + if: ${{ steps.changed-files.outputs.any_modified == 'true' }} + uses: docker/build-push-action@v3 + with: + context: jobs/merge-into-bq + build-contexts: pip_cache=${{ steps.pip-cache-dir.outputs.value }} + push: true + tags: ${{ steps.image-tag.outputs.value }} + cache-from: type=local,src=/tmp/.buildx-cache-staging + cache-to: type=local,dest=/tmp/.buildx-cache-new-staging + + - name: 'Update docker cache' + if: ${{ steps.changed-files.outputs.any_modified == 'true' }} + run: | + rm -rf /tmp/.buildx-cache-staging + mv /tmp/.buildx-cache-new-staging /tmp/.buildx-cache-staging diff --git a/.github/workflows/production.yaml b/.github/workflows/production.yaml index c58a2e8..caeab05 100644 --- a/.github/workflows/production.yaml +++ b/.github/workflows/production.yaml @@ -7,7 +7,7 @@ on: concurrency: production env: - DWH_PROJECT: toptal.com:api-project-726361118046 + DWH_PROJECT: analytics-warehouse-production DOCKER_REGISTRY_PROJECT: toptal-hub jobs: diff --git a/dags/masterdata/luigi_task_hist_1.py b/dags/masterdata/luigi_task_hist_1.py index 587a29e..2b8cc75 100644 --- a/dags/masterdata/luigi_task_hist_1.py +++ b/dags/masterdata/luigi_task_hist_1.py @@ -8,7 +8,7 @@ STAGING_BUCKET = 'com-toptal-analytics-staging-airbyte' DWH_PROJECT = os.environ['DWH_PROJECT'] DOCKER_REGISTRY_PROJECT = os.environ['DOCKER_REGISTRY_PROJECT'] -BQ_DATASET = 'analytics_warehouse_prototype' +BQ_DATASET = 'masterdata' JOB_IMAGE_TAG = os.environ.get('JOB_IMAGE_TAG', 'latest') with DAG( diff --git a/jobs/merge-into-bq/Dockerfile b/jobs/merge-into-bq/Dockerfile index 361540d..0531a28 100644 --- a/jobs/merge-into-bq/Dockerfile +++ b/jobs/merge-into-bq/Dockerfile @@ -1,5 +1,7 @@ FROM python:3.8 +LABEL version="1.0.0" + RUN pip install --upgrade pip && pip install poetry WORKDIR /app