Skip to content

Commit 251785a

Browse files
Feature/performance enhancement (#127)
* feature/performance-enhancement
1 parent ec9fec4 commit 251785a

27 files changed

+329
-369
lines changed

.buildkite/hooks/pre-command

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,6 @@ export CI_SNOWFLAKE_DBT_WAREHOUSE=$(gcloud secrets versions access latest --secr
2222
export CI_DATABRICKS_DBT_HOST=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_HOST" --project="dbt-package-testing-363917")
2323
export CI_DATABRICKS_DBT_HTTP_PATH=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_HTTP_PATH" --project="dbt-package-testing-363917")
2424
export CI_DATABRICKS_DBT_TOKEN=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_TOKEN" --project="dbt-package-testing-363917")
25-
export CI_DATABRICKS_DBT_CATALOG=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_CATALOG" --project="dbt-package-testing-363917")
25+
export CI_DATABRICKS_DBT_CATALOG=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_CATALOG" --project="dbt-package-testing-363917")
26+
export CI_DATABRICKS_SQL_DBT_HTTP_PATH=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_SQL_DBT_HTTP_PATH" --project="dbt-package-testing-363917")
27+
export CI_DATABRICKS_SQL_DBT_TOKEN=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_SQL_DBT_TOKEN" --project="dbt-package-testing-363917")

.buildkite/pipeline.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,18 @@ steps:
7272
- "CI_DATABRICKS_DBT_CATALOG"
7373
commands: |
7474
bash .buildkite/scripts/run_models.sh databricks
75+
76+
- label: ":databricks: :database: Run Tests - Databricks SQL Warehouse"
77+
key: "run_dbt_databricks_sql"
78+
plugins:
79+
- docker#v3.13.0:
80+
image: "python:3.8"
81+
shell: [ "/bin/bash", "-e", "-c" ]
82+
environment:
83+
- "BASH_ENV=/tmp/.bashrc"
84+
- "CI_DATABRICKS_DBT_HOST"
85+
- "CI_DATABRICKS_SQL_DBT_HTTP_PATH"
86+
- "CI_DATABRICKS_SQL_DBT_TOKEN"
87+
- "CI_DATABRICKS_DBT_CATALOG"
88+
commands: |
89+
bash .buildkite/scripts/run_models.sh databricks-sql

.buildkite/scripts/run_models.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,26 @@ db=$1
1616
echo `pwd`
1717
cd integration_tests
1818
dbt deps
19+
20+
if [ "$db" = "databricks-sql" ]; then
21+
dbt seed --vars '{jira_schema: jira_integrations_tests_sqlw}' --target "$db" --full-refresh
22+
dbt compile --vars '{jira_schema: jira_integrations_tests_sqlw}' --target "$db"
23+
dbt run --vars '{jira_schema: jira_integrations_tests_sqlw}' --target "$db" --full-refresh
24+
dbt run --vars '{jira_schema: jira_integrations_tests_sqlw}' --target "$db"
25+
dbt test --vars '{jira_schema: jira_integrations_tests_sqlw}' --target "$db"
26+
dbt run --vars '{jira_schema: jira_integrations_tests_sqlw, jira_using_priorities: false, jira_using_sprints: false, jira_using_components: false, jira_using_versions: false, jira_field_grain: 'field_name'}' --target "$db" --full-refresh
27+
dbt run --vars '{jira_schema: jira_integrations_tests_sqlw, jira_using_priorities: false, jira_using_sprints: false, jira_using_components: false, jira_using_versions: false, jira_field_grain: 'field_name'}' --target "$db"
28+
dbt test --vars '{jira_schema: jira_integrations_tests_sqlw}' --target "$db"
29+
30+
else
1931
dbt seed --target "$db" --full-refresh
32+
dbt compile --target "$db"
2033
dbt run --target "$db" --full-refresh
2134
dbt run --target "$db"
2235
dbt test --target "$db"
2336
dbt run --vars "{jira_using_priorities: false, jira_using_sprints: false, jira_using_components: false, jira_using_versions: false, jira_field_grain: 'field_name'}" --target "$db" --full-refresh
37+
dbt run --vars "{jira_using_priorities: false, jira_using_sprints: false, jira_using_components: false, jira_using_versions: false, jira_field_grain: 'field_name'}" --target "$db"
2438
dbt test --target "$db"
39+
fi
40+
2541
dbt run-operation fivetran_utils.drop_schemas_automation --target "$db"

.github/PULL_REQUEST_TEMPLATE/maintainer_pull_request_template.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ Please acknowledge that you have successfully performed the following commands l
1414
- [ ] dbt run (if incremental models are present) && dbt test
1515

1616
Before marking this PR as "ready for review" the following have been applied:
17-
- [ ] The appropriate issue has been linked, tagged, and properly assigned
18-
- [ ] All necessary documentation and version upgrades have been applied
19-
- [ ] docs were regenerated (unless this PR does not include any code or yml updates)
20-
- [ ] BuildKite integration tests are passing
21-
- [ ] Detailed validation steps have been provided below
17+
- [ ] The appropriate issue has been linked, tagged, and properly assigned.
18+
- [ ] All necessary documentation and version upgrades have been applied.
19+
<!--- Be sure to update the package version in the dbt_project.yml, integration_tests/dbt_project.yml, and README if necessary. -->
20+
- [ ] docs were regenerated (unless this PR does not include any code or yml updates).
21+
- [ ] BuildKite integration tests are passing.
22+
- [ ] Detailed validation steps have been provided below.
2223

2324
### Detailed Validation
2425
Please share any and all of your validation steps:

CHANGELOG.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,35 @@
1+
# dbt_jira v0.17.0
2+
[PR #127](https://github.com/fivetran/dbt_jira/pull/127) contains the following updates:
3+
4+
## 🚨 Breaking Changes 🚨
5+
> ⚠️ Since the following changes are breaking, a `--full-refresh` after upgrading will be required.
6+
- To reduce storage, updated the default materialization of the upstream staging models to views. (See the [dbt_jira_source CHANGELOG](https://github.com/fivetran/dbt_jira_source/blob/main/CHANGELOG.md#dbt_jira_source-v070) for more details.)
7+
8+
## Performance improvements (🚨 Breaking Changes 🚨)
9+
- Updated the incremental strategy of the following models to `insert_overwrite` for BigQuery and Databricks All Purpose Cluster destinations and `delete+insert` for all other supported destinations.
10+
- `int_jira__issue_calendar_spine`
11+
- `int_jira__pivot_daily_field_history`
12+
- `jira__daily_issue_field_history`
13+
> At this time, models for Databricks SQL Warehouse destinations are materialized as tables without support for incremental runs.
14+
15+
- Removed intermediate models `int_jira__agg_multiselect_history`, `int_jira__combine_field_histories`, and `int_jira__daily_field_history` by combining them with `int_jira__pivot_daily_field_history`. This is to reduce the redundancy of the data stored in tables, the number of full scans, and the volume of write operations.
16+
- Note that if you have previously run this package, these models may still exist in your destination schema, however they will no longer be updated.
17+
- Updated the default materialization of `int_jira__issue_type_parents` from a table to a view. This model is called only in `int_jira__issue_users`, so a view will reduce storage requirements while not significantly hindering performance.
18+
- For Snowflake and BigQuery destinations, added the following `cluster_by` columns to the configs for incremental models:
19+
- `int_jira__issue_calendar_spine` clustering on columns `['date_day', 'issue_id']`
20+
- `int_jira__pivot_daily_field_history` clustering on columns `['valid_starting_on', 'issue_id']`
21+
- `jira__daily_issue_field_history` clustering on columns `['date_day', 'issue_id']`
22+
- For Databricks All Purpose Cluster destinations, updated incremental model file formats to `parquet` for compatibility with the `insert_overwrite` strategy.
23+
24+
## Features
25+
- Added a default 3-day look-back to incremental models to accommodate late arriving records. The number of days can be changed by setting the var `lookback_window` in your dbt_project.yml. See the [Lookback Window section of the README](https://github.com/fivetran/dbt_jira/blob/main/README.md#lookback-window) for more details.
26+
- Added macro `jira_lookback` to streamline the lookback window calculation.
27+
28+
## Under the Hood:
29+
- Added integration testing pipeline for Databricks SQL Warehouse.
30+
- Added macro `jira_is_databricks_sql_warehouse` for detecting if a Databricks target is an All Purpose Cluster or a SQL Warehouse.
31+
- Updated the maintainer pull request template.
32+
133
# dbt_jira v0.16.0
234
[PR #122](https://github.com/fivetran/dbt_jira/pull/122) contains the following updates:
335

README.md

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,23 @@ dispatch:
5050
search_order: ['spark_utils', 'dbt_utils']
5151
```
5252
53+
### Database Incremental Strategies
54+
Models in this package that are materialized incrementally are configured to work with the different strategies available to each supported warehouse.
55+
56+
For **BigQuery** and **Databricks All Purpose Cluster runtime** destinations, we have chosen `insert_overwrite` as the default strategy, which benefits from the partitioning capability.
57+
> For Databricks SQL Warehouse destinations, models are materialized as tables without support for incremental runs.
58+
59+
For **Snowflake**, **Redshift**, and **Postgres** databases, we have chosen `delete+insert` as the default strategy.
60+
61+
> Regardless of strategy, we recommend that users periodically run a `--full-refresh` to ensure a high level of data quality.
62+
5363
## Step 2: Install the package
5464
Include the following jira package version in your `packages.yml` file:
5565
> TIP: Check [dbt Hub](https://hub.getdbt.com/) for the latest installation instructions or [read the dbt docs](https://docs.getdbt.com/docs/package-management) for more information on installing packages.
5666
```yaml
5767
packages:
5868
- package: fivetran/jira
59-
version: [">=0.16.0", "<0.17.0"]
69+
version: [">=0.17.0", "<0.18.0"]
6070
6171
```
6272
## Step 3: Define database and schema variables
@@ -131,6 +141,17 @@ vars:
131141
jira_<default_source_table_name>_identifier: your_table_name
132142
```
133143

144+
### Lookback Window
145+
Records from the source can sometimes arrive late. Since several of the models in this package are incremental, by default we look back 3 days to ensure late arrivals are captured while avoiding the need for frequent full refreshes. While the frequency can be reduced, we still recommend running `dbt --full-refresh` periodically to maintain data quality of the models.
146+
147+
To change the default lookback window, add the following variable to your `dbt_project.yml` file:
148+
149+
```yml
150+
vars:
151+
jira:
152+
lookback_window: number_of_days # default is 3
153+
```
154+
134155
## (Optional) Step 6: Orchestrate your models with Fivetran Transformations for dbt Core™
135156
<details><summary>Expand for details</summary>
136157
<br>
@@ -145,7 +166,7 @@ This dbt package is dependent on the following dbt packages. Please be aware tha
145166
```yml
146167
packages:
147168
- package: fivetran/jira_source
148-
version: [">=0.6.0", "<0.7.0"]
169+
version: [">=0.7.0", "<0.8.0"]
149170
150171
- package: fivetran/fivetran_utils
151172
version: [">=0.4.0", "<0.5.0"]

dbt_project.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: 'jira'
2-
version: '0.16.0'
2+
version: '0.17.0'
33
config-version: 2
44
require-dbt-version: [">=1.3.0", "<2.0.0"]
55
vars:

docs/catalog.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/manifest.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/run_results.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)