Skip to content

Commit

Permalink
Build the merge-into-bq job with a dedicated GHA workflow (singer-io#5)
Browse files Browse the repository at this point in the history
* Build the merge-into-bq job with a dedicated GHA workflow

* Remove the merge-into-bq build from the Development workflow

* Add label to the merge-into-bq Docker image to cause a rebuild

* Hack GHA workflow to push the merge-into-bq job image tagged with `latest`

* Publish production docker images to the analytics-warehouse-production project

* Change production DWH project and dataset for the masterdata layer
  • Loading branch information
mryorik authored Aug 22, 2022
1 parent 7589369 commit 321c923
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 98 deletions.
96 changes: 0 additions & 96 deletions .github/workflows/development.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -142,99 +142,3 @@ jobs:
- name: 'Upload DAGs'
run: |
gsutil rsync -rd dags gs://${{ needs.create-composer-env.outputs.bucket }}/dags
merge-into-bq:
runs-on: [ self-hosted, Linux, X64 ]
concurrency: merge-into-bq-${{ github.event.pull_request.head.ref || github.ref_name }}
steps:
- name: 'Checkout'
uses: actions/checkout@v3
with:
fetch-depth: 0

- uses: docker/setup-buildx-action@v2
- uses: docker/login-action@v1
with:
registry: gcr.io
username: _json_key
password: ${{ secrets.DATA_COMPOSER_SA_DEV_KEY }}

- name: 'Get master HEAD commit SHA'
id: master-sha
run: |
git show-ref master -s
echo "::set-output name=value::$(git show-ref master -s)"
- name: 'Get changed files'
id: changed-files
uses: tj-actions/changed-files@v24
with:
base_sha: ${{ steps.master-sha.outputs.value }}
files: |
jobs/merge-into-bq/**/*
- name: 'Setup Python'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
uses: actions/setup-python@v4
with:
python-version: 3.8
- name: 'Get pip cache dir'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
id: pip-cache-dir
run: |
echo "::set-output name=value::$(pip cache dir)"
- name: 'Cache pip'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
uses: actions/cache@v2
with:
path: ${{ steps.pip-cache-dir.outputs.value }}
key: ${{ runner.os }}-poetry-${{ hashFiles('jobs/merge-into-bq/poetry.lock') }}

- name: 'Setup Poetry'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
uses: Gr1N/setup-poetry@v7

- name: 'Poetry install'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
working-directory: jobs/merge-into-bq
run: poetry install

- name: 'Pytest'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
working-directory: jobs/merge-into-bq
run: poetry run pytest -vv

- name: 'Get image tag'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
id: image-tag
run: |
if [ '${{ github.ref_name }}' == 'master' ]; then
echo "::set-output name=value::gcr.io/toptal-hub/data-composer/jobs/merge-into-bq:latest"
else
echo "::set-output name=value::gcr.io/analytics-warehouse-dev/data-composer/jobs/merge-into-bq:${{ github.ref_name }}"
fi
- name: 'Cache Docker layers'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache-staging
key: ${{ runner.os }}-buildx-staging-${{ github.ref_name }}
restore-keys: ${{ runner.os }}-buildx-staging-master

- name: 'Docker build and push'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
uses: docker/build-push-action@v3
with:
context: jobs/merge-into-bq
build-contexts: pip_cache=${{ steps.pip-cache-dir.outputs.value }}
push: true
tags: ${{ steps.image-tag.outputs.value }}
cache-from: type=local,src=/tmp/.buildx-cache-staging
cache-to: type=local,dest=/tmp/.buildx-cache-new-staging

- name: 'Update docker cache'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
run: |
rm -rf /tmp/.buildx-cache-staging
mv /tmp/.buildx-cache-new-staging /tmp/.buildx-cache-staging
113 changes: 113 additions & 0 deletions .github/workflows/merge-into-bq.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
name: merge-into-bq
on: [push]

env:
DWH_PROJECT: analytics-warehouse-dev
DOCKER_REGISTRY_PROJECT: analytics-warehouse-dev

jobs:
merge-into-bq:
runs-on: [ self-hosted, Linux, X64 ]
concurrency: merge-into-bq-${{ github.ref_name }}
steps:
- name: 'Checkout'
uses: actions/checkout@v3
with:
fetch-depth: 0

- uses: docker/setup-buildx-action@v2
- name: 'Login to gcr.io/analytics-warehouse-dev'
if: ${{ github.ref_name != 'master' }}
uses: docker/login-action@v1
with:
registry: gcr.io
username: _json_key
password: ${{ secrets.DATA_COMPOSER_SA_DEV_KEY }}
- name: 'Login to gcr.io/toptal-hub'
if: ${{ github.ref_name == 'master' }}
uses: docker/login-action@v1
with:
registry: gcr.io
username: _json_key
password: ${{ secrets.DATA_COMPOSER_SA_PROD_KEY }}

- name: 'Get base commit SHA'
if: ${{ github.ref_name != 'master' }}
id: base-sha
run: |
git show-ref master -s
echo "::set-output name=value::$(git show-ref master -s)"
- name: 'Get changed files'
id: changed-files
uses: tj-actions/changed-files@v24
with:
base_sha: ${{ steps.base-sha.outputs.value || github.before }}
files: |
jobs/merge-into-bq/**/*
- name: 'Setup Python'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
uses: actions/setup-python@v4
with:
python-version: 3.8
- name: 'Get pip cache dir'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
id: pip-cache-dir
run: |
echo "::set-output name=value::$(pip cache dir)"
- name: 'Cache pip'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
uses: actions/cache@v2
with:
path: ${{ steps.pip-cache-dir.outputs.value }}
key: ${{ runner.os }}-poetry-${{ hashFiles('jobs/merge-into-bq/poetry.lock') }}

- name: 'Setup Poetry'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
uses: Gr1N/setup-poetry@v7

- name: 'Poetry install'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
working-directory: jobs/merge-into-bq
run: poetry install

- name: 'Pytest'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
working-directory: jobs/merge-into-bq
run: poetry run pytest -vv

- name: 'Get image tag'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
id: image-tag
run: |
if [ '${{ github.ref_name }}' == 'master' ]; then
echo "::set-output name=value::gcr.io/analytics-warehouse-production/data-composer/jobs/merge-into-bq:latest"
else
echo "::set-output name=value::gcr.io/analytics-warehouse-dev/data-composer/jobs/merge-into-bq:${{ github.ref_name }}"
fi
- name: 'Cache Docker layers'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache-staging
key: ${{ runner.os }}-buildx-staging-${{ github.ref_name }}
restore-keys: ${{ runner.os }}-buildx-staging-master

- name: 'Docker build and push'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
uses: docker/build-push-action@v3
with:
context: jobs/merge-into-bq
build-contexts: pip_cache=${{ steps.pip-cache-dir.outputs.value }}
push: true
tags: ${{ steps.image-tag.outputs.value }}
cache-from: type=local,src=/tmp/.buildx-cache-staging
cache-to: type=local,dest=/tmp/.buildx-cache-new-staging

- name: 'Update docker cache'
if: ${{ steps.changed-files.outputs.any_modified == 'true' }}
run: |
rm -rf /tmp/.buildx-cache-staging
mv /tmp/.buildx-cache-new-staging /tmp/.buildx-cache-staging
2 changes: 1 addition & 1 deletion .github/workflows/production.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
concurrency: production

env:
DWH_PROJECT: toptal.com:api-project-726361118046
DWH_PROJECT: analytics-warehouse-production
DOCKER_REGISTRY_PROJECT: toptal-hub

jobs:
Expand Down
2 changes: 1 addition & 1 deletion dags/masterdata/luigi_task_hist_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
STAGING_BUCKET = 'com-toptal-analytics-staging-airbyte'
DWH_PROJECT = os.environ['DWH_PROJECT']
DOCKER_REGISTRY_PROJECT = os.environ['DOCKER_REGISTRY_PROJECT']
BQ_DATASET = 'analytics_warehouse_prototype'
BQ_DATASET = 'masterdata'
JOB_IMAGE_TAG = os.environ.get('JOB_IMAGE_TAG', 'latest')

with DAG(
Expand Down
2 changes: 2 additions & 0 deletions jobs/merge-into-bq/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
FROM python:3.8

LABEL version="1.0.0"

RUN pip install --upgrade pip && pip install poetry

WORKDIR /app
Expand Down

0 comments on commit 321c923

Please sign in to comment.