forked from singer-io/tap-zoom
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[DE-2344] Use KubernetesPodOperator to merge data into BigQuery (sing…
- Loading branch information
Showing
22 changed files
with
1,637 additions
and
230 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,15 +13,16 @@ on: | |
|
||
env: | ||
DWH_PROJECT: analytics-warehouse-dev | ||
DOCKER_REGISTRY_PROJECT: analytics-warehouse-dev | ||
|
||
jobs: | ||
create-composer-env: | ||
runs-on: [ self-hosted, Linux, X64 ] | ||
if: ${{ inputs.create_env }} | ||
concurrency: ${{ github.event.pull_request.head.ref || github.ref_name }} | ||
outputs: | ||
name: ${{ steps.env-name.outputs.lowercase }} | ||
bucket: ${{ steps.env-bucket.outputs.bucket }} | ||
exists: ${{ steps.env-exists.outputs.env-exists }} | ||
steps: | ||
- name: 'Checkout' | ||
uses: actions/checkout@v3 | ||
|
@@ -39,29 +40,60 @@ jobs: | |
|
||
- name: 'Set up Cloud SDK' | ||
uses: 'google-github-actions/[email protected]' | ||
with: | ||
install_components: 'kubectl' | ||
|
||
- name: "Check if environment exists" | ||
id: env-exists | ||
run: | | ||
echo "::set-output name=env-exists::$(gcloud composer environments list --locations us-east1 | grep ' ${{ steps.env-name.outputs.lowercase }} ' | wc -l | xargs)" | ||
- name: 'Create Composer environment' | ||
if: ${{ steps.env-exists.outputs.env-exists == '0' }} | ||
if: ${{ steps.env-exists.outputs.env-exists == '0' && inputs.create_env }} | ||
run: | | ||
gcloud composer environments create ${{ steps.env-name.outputs.lowercase }} \ | ||
--location us-east1 \ | ||
--image-version composer-2.0.21-airflow-2.2.5 \ | ||
--environment-size small \ | ||
--max-workers 1 \ | ||
--airflow-configs=scheduler-min_file_process_interval=10,scheduler-dag_dir_list_interval=60,core-dags_are_paused_at_creation=True,secrets-backend=airflow.providers.google.cloud.secrets.secret_manager.CloudSecretManagerBackend \ | ||
--env-variables=DWH_PROJECT=${{ env.DWH_PROJECT }} | ||
--env-variables=DWH_PROJECT=${{ env.DWH_PROJECT }},DOCKER_REGISTRY_PROJECT=${{ env.DOCKER_REGISTRY_PROJECT }},JOB_IMAGE_TAG=${{ github.ref_name }} \ | ||
--network default --subnetwork composer --cluster-secondary-range-name pods --services-secondary-range-name services | ||
- name: 'Get GKE cluster credentials' | ||
if: ${{ steps.env-exists.outputs.env-exists == '1' || inputs.create_env }} | ||
run: | | ||
gcloud container clusters get-credentials \ | ||
$(gcloud composer environments describe ${{ steps.env-name.outputs.lowercase }} \ | ||
--format 'get(config.gkeCluster)' --location us-east1) \ | ||
--zone us-east1 | ||
- name: 'Enable Workload Identity for kube-public/default service account' | ||
if: ${{ steps.env-exists.outputs.env-exists == '1' || inputs.create_env }} | ||
run: | | ||
kubectl annotate serviceaccount default \ | ||
--namespace kube-public --overwrite \ | ||
iam.gke.io/gcp-service-account=data-composer-github@analytics-warehouse-dev.iam.gserviceaccount.com | ||
gcloud iam service-accounts add-iam-policy-binding data-composer-github@analytics-warehouse-dev.iam.gserviceaccount.com \ | ||
--role roles/iam.workloadIdentityUser \ | ||
--member "serviceAccount:analytics-warehouse-dev.svc.id.goog[kube-public/default]" | ||
- name: 'Grant cluster-admin role to composer-2-*/default service account' | ||
if: ${{ steps.env-exists.outputs.env-exists == '1' || inputs.create_env }} | ||
run: | | ||
kubectl delete clusterrolebinding composer-cluster-admin | true | ||
kubectl create clusterrolebinding composer-cluster-admin \ | ||
--namespace kube-public --clusterrole cluster-admin \ | ||
--serviceaccount=$(kubectl get namespaces --output=jsonpath='{.items[].metadata.name}' | grep composer-2-):default | ||
- name: 'Get Composer environment bucket' | ||
if: ${{ steps.env-exists.outputs.env-exists == '1' || inputs.create_env }} | ||
id: env-bucket | ||
run: | | ||
echo "::set-output name=bucket::$(gcloud composer environments describe ${{ steps.env-name.outputs.lowercase }} --location us-east1 --format="get(config.dagGcsPrefix)" | sed -E 's/^.+\/(.+)\/.*$/\1/')" | ||
- name: 'Install Python dependencies' | ||
if: ${{ steps.env-exists.outputs.env-exists == '1' || inputs.create_env }} | ||
run: | | ||
gcloud composer environments update ${{ steps.env-name.outputs.lowercase }} \ | ||
--location us-east1 \ | ||
|
@@ -71,6 +103,7 @@ jobs: | |
runs-on: [ self-hosted, Linux, X64 ] | ||
needs: | ||
- create-composer-env | ||
if: ${{ needs.create-composer-env.outputs.exists == '1' || inputs.create_env }} | ||
steps: | ||
- name: 'Checkout' | ||
uses: actions/checkout@v3 | ||
|
@@ -109,3 +142,99 @@ jobs: | |
- name: 'Upload DAGs' | ||
run: | | ||
gsutil rsync -rd dags gs://${{ needs.create-composer-env.outputs.bucket }}/dags | ||
merge-into-bq: | ||
runs-on: [ self-hosted, Linux, X64 ] | ||
concurrency: merge-into-bq-${{ github.event.pull_request.head.ref || github.ref_name }} | ||
steps: | ||
- name: 'Checkout' | ||
uses: actions/checkout@v3 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- uses: docker/setup-buildx-action@v2 | ||
- uses: docker/login-action@v1 | ||
with: | ||
registry: gcr.io | ||
username: _json_key | ||
password: ${{ secrets.DATA_COMPOSER_SA_DEV_KEY }} | ||
|
||
- name: 'Get master HEAD commit SHA' | ||
id: master-sha | ||
run: | | ||
git show-ref master -s | ||
echo "::set-output name=value::$(git show-ref master -s)" | ||
- name: 'Get changed files' | ||
id: changed-files | ||
uses: tj-actions/changed-files@v24 | ||
with: | ||
base_sha: ${{ steps.master-sha.outputs.value }} | ||
files: | | ||
jobs/merge-into-bq/**/* | ||
- name: 'Setup Python' | ||
if: ${{ steps.changed-files.outputs.any_modified == 'true' }} | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: 3.8 | ||
- name: 'Get pip cache dir' | ||
if: ${{ steps.changed-files.outputs.any_modified == 'true' }} | ||
id: pip-cache-dir | ||
run: | | ||
echo "::set-output name=value::$(pip cache dir)" | ||
- name: 'Cache pip' | ||
if: ${{ steps.changed-files.outputs.any_modified == 'true' }} | ||
uses: actions/cache@v2 | ||
with: | ||
path: ${{ steps.pip-cache-dir.outputs.value }} | ||
key: ${{ runner.os }}-poetry-${{ hashFiles('jobs/merge-into-bq/poetry.lock') }} | ||
|
||
- name: 'Setup Poetry' | ||
if: ${{ steps.changed-files.outputs.any_modified == 'true' }} | ||
uses: Gr1N/setup-poetry@v7 | ||
|
||
- name: 'Poetry install' | ||
if: ${{ steps.changed-files.outputs.any_modified == 'true' }} | ||
working-directory: jobs/merge-into-bq | ||
run: poetry install | ||
|
||
- name: 'Pytest' | ||
if: ${{ steps.changed-files.outputs.any_modified == 'true' }} | ||
working-directory: jobs/merge-into-bq | ||
run: poetry run pytest -vv | ||
|
||
- name: 'Get image tag' | ||
if: ${{ steps.changed-files.outputs.any_modified == 'true' }} | ||
id: image-tag | ||
run: | | ||
if [ '${{ github.ref_name }}' == 'master' ]; then | ||
echo "::set-output name=value::gcr.io/toptal-hub/data-composer/jobs/merge-into-bq:latest" | ||
else | ||
echo "::set-output name=value::gcr.io/analytics-warehouse-dev/data-composer/jobs/merge-into-bq:${{ github.ref_name }}" | ||
fi | ||
- name: 'Cache Docker layers' | ||
if: ${{ steps.changed-files.outputs.any_modified == 'true' }} | ||
uses: actions/cache@v2 | ||
with: | ||
path: /tmp/.buildx-cache-staging | ||
key: ${{ runner.os }}-buildx-staging-${{ github.ref_name }} | ||
restore-keys: ${{ runner.os }}-buildx-staging-master | ||
|
||
- name: 'Docker build and push' | ||
if: ${{ steps.changed-files.outputs.any_modified == 'true' }} | ||
uses: docker/build-push-action@v3 | ||
with: | ||
context: jobs/merge-into-bq | ||
build-contexts: pip_cache=${{ steps.pip-cache-dir.outputs.value }} | ||
push: true | ||
tags: ${{ steps.image-tag.outputs.value }} | ||
cache-from: type=local,src=/tmp/.buildx-cache-staging | ||
cache-to: type=local,dest=/tmp/.buildx-cache-new-staging | ||
|
||
- name: 'Update docker cache' | ||
if: ${{ steps.changed-files.outputs.any_modified == 'true' }} | ||
run: | | ||
rm -rf /tmp/.buildx-cache-staging | ||
mv /tmp/.buildx-cache-new-staging /tmp/.buildx-cache-staging |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ concurrency: staging | |
|
||
env: | ||
DWH_PROJECT: analytics-warehouse-dev | ||
DOCKER_REGISTRY_PROJECT: analytics-warehouse-dev | ||
|
||
jobs: | ||
update-composer-env: | ||
|
@@ -47,7 +48,7 @@ jobs: | |
--environment-size small \ | ||
--max-workers 1 \ | ||
--airflow-configs=scheduler-min_file_process_interval=10,scheduler-dag_dir_list_interval=60,core-dags_are_paused_at_creation=True,secrets-backend=airflow.providers.google.cloud.secrets.secret_manager.CloudSecretManagerBackend,sentry-sentry_dsn=https://[email protected]/6564231 \ | ||
--env-variables=DWH_PROJECT=${{ env.DWH_PROJECT }} | ||
--env-variables=DWH_PROJECT=${{ env.DWH_PROJECT }},DOCKER_REGISTRY_PROJECT=${{ env.DOCKER_REGISTRY_PROJECT }} | ||
- name: 'Get Composer environment bucket' | ||
id: env-bucket | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -132,3 +132,4 @@ dmypy.json | |
|
||
# Docker | ||
.env.local | ||
credentials.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.