Skip to content

Commit 1555c00

Browse files
chose to break out the weekly full refresh into it's own dag so that the original dag is preserved if it nedds to be used
1 parent 862afb8 commit 1555c00

File tree

4 files changed

+158
-1
lines changed

4 files changed

+158
-1
lines changed

airflow/dags/transform_warehouse_full_refresh/METADATA.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
description: "Runs dbt with --full-refresh; use this to refresh/rebuild/backfill incremental models."
2-
schedule_interval: "0 12 * * 0"
2+
schedule_interval: Null
33
tags:
44
- all_gusty_features
55
default_args:
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
description: "Runs dbt with --full-refresh; use this to refresh/rebuild/backfill incremental models."
2+
schedule_interval: "0 12 * * 0"
3+
tags:
4+
- all_gusty_features
5+
default_args:
6+
owner: airflow
7+
depends_on_past: False
8+
start_date: !days_ago 1
9+
email:
10+
11+
12+
13+
email_on_failure: True
14+
email_on_retry: False
15+
retries: 0
16+
retry_delay: !timedelta 'minutes: 2'
17+
concurrency: 50
18+
#sla: !timedelta 'hours: 2'
19+
latest_only: True
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
operator: 'operators.PodOperator'
2+
name: 'dbt-run-and-upload-artifacts'
3+
image: 'ghcr.io/cal-itp/data-infra/warehouse:{{ image_tag() }}'
4+
5+
cmds:
6+
- python3
7+
arguments:
8+
- '/app/scripts/run_and_upload.py'
9+
- 'run'
10+
- '--full-refresh'
11+
- '--dbt-docs'
12+
- '--save-artifacts'
13+
- '--deploy-docs'
14+
- '--sync-metabase'
15+
- '--select'
16+
- "{{ dag_run.conf.get('dbt_select_statement', '') }}"
17+
- '--exclude'
18+
- 'gtfs_rt_external_tables+'
19+
20+
is_delete_operator_pod: true
21+
get_logs: true
22+
is_gke: true
23+
pod_location: us-west1
24+
cluster_name: data-infra-apps
25+
namespace: airflow-jobs
26+
priority_class_name: dbt-high-priority
27+
28+
env_vars:
29+
AIRFLOW_ENV: "{{ env_var('AIRFLOW_ENV') }}"
30+
CALITP_BUCKET__DBT_ARTIFACTS: "{{ env_var('CALITP_BUCKET__DBT_ARTIFACTS') }}"
31+
BIGQUERY_KEYFILE_LOCATION: /secrets/jobs-data/service_account.json
32+
DBT_PROJECT_DIR: /app
33+
DBT_PROFILE_DIR: /app
34+
DBT_DATABASE: "{{ get_project_id() }}"
35+
DBT_TARGET: "{{ env_var('DBT_TARGET') }}"
36+
MB_HOST: dashboards.calitp.org
37+
NETLIFY_SITE_ID: cal-itp-dbt-docs
38+
SENTRY_DSN: "{{ env_var('SENTRY_DSN') }}"
39+
SENTRY_ENVIRONMENT: "{{ env_var('SENTRY_ENVIRONMENT') }}"
40+
41+
secrets:
42+
- deploy_type: volume
43+
deploy_target: /secrets/jobs-data/
44+
secret: jobs-data
45+
key: service-account.json
46+
- deploy_type: env
47+
deploy_target: MB_USER
48+
secret: jobs-data
49+
key: metabase-user
50+
- deploy_type: env
51+
deploy_target: MB_PASSWORD
52+
secret: jobs-data
53+
key: metabase-password
54+
- deploy_type: env
55+
deploy_target: NETLIFY_AUTH_TOKEN
56+
secret: jobs-data
57+
key: netlify-auth-token
58+
59+
k8s_resources:
60+
request_memory: 2.0Gi
61+
request_cpu: 1
62+
63+
tolerations:
64+
- key: pod-role
65+
operator: Equal
66+
value: computetask
67+
effect: NoSchedule
68+
69+
affinity:
70+
nodeAffinity:
71+
requiredDuringSchedulingIgnoredDuringExecution:
72+
nodeSelectorTerms:
73+
- matchExpressions:
74+
- key: pod-role
75+
operator: In
76+
values:
77+
- computetask
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
operator: 'operators.PodOperator'
2+
name: 'dbt-test'
3+
image: 'ghcr.io/cal-itp/data-infra/warehouse:{{ image_tag() }}'
4+
5+
cmds:
6+
- python3
7+
arguments:
8+
- '/app/scripts/run_and_upload.py'
9+
- 'run'
10+
- '--no-dbt-seed'
11+
- '--no-dbt-run'
12+
- '--dbt-test'
13+
- '--select'
14+
- "{{ dag_run.conf.get('dbt_select_statement', '') }}"
15+
16+
dependencies:
17+
- dbt_run_and_upload_artifacts
18+
trigger_rule: all_done
19+
20+
is_delete_operator_pod: true
21+
get_logs: true
22+
is_gke: true
23+
pod_location: us-west1
24+
cluster_name: data-infra-apps
25+
namespace: airflow-jobs
26+
27+
env_vars:
28+
AIRFLOW_ENV: "{{ env_var('AIRFLOW_ENV') }}"
29+
CALITP_BUCKET__DBT_ARTIFACTS: "{{ env_var('CALITP_BUCKET__DBT_ARTIFACTS') }}"
30+
BIGQUERY_KEYFILE_LOCATION: /secrets/jobs-data/service_account.json
31+
DBT_PROJECT_DIR: /app
32+
DBT_PROFILE_DIR: /app
33+
DBT_TARGET: "{{ env_var('DBT_TARGET') }}"
34+
SENTRY_DSN: "{{ env_var('SENTRY_DSN') }}"
35+
SENTRY_ENVIRONMENT: "{{ env_var('SENTRY_ENVIRONMENT') }}"
36+
37+
secrets:
38+
- deploy_type: volume
39+
deploy_target: /secrets/jobs-data/
40+
secret: jobs-data
41+
key: service-account.json
42+
43+
k8s_resources:
44+
request_memory: 2.0Gi
45+
request_cpu: 1
46+
47+
tolerations:
48+
- key: pod-role
49+
operator: Equal
50+
value: computetask
51+
effect: NoSchedule
52+
53+
affinity:
54+
nodeAffinity:
55+
requiredDuringSchedulingIgnoredDuringExecution:
56+
nodeSelectorTerms:
57+
- matchExpressions:
58+
- key: pod-role
59+
operator: In
60+
values:
61+
- computetask

0 commit comments

Comments
 (0)