From 35f9f398b607feaac99b024e89c284d3b736513f Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Sun, 22 Dec 2024 00:09:55 +0100 Subject: [PATCH] dbt: Add example project using `delete+insert` materialization Ephemeral and incremental materialization using the `delete+insert` strategy. --- .github/dependabot.yml | 5 ++++ .github/workflows/framework-dbt.yml | 8 ++++++ framework/dbt/materialize/.gitignore | 5 ++++ framework/dbt/materialize/README.md | 25 +++++++++++++++++++ framework/dbt/materialize/dbt_project.yml | 12 +++++++++ .../materialize/models/example/jobslog.sql | 3 +++ .../dbt/materialize/models/example/test.sql | 12 +++++++++ framework/dbt/materialize/packages.yml | 3 +++ framework/dbt/materialize/profiles.yml | 12 +++++++++ framework/dbt/materialize/requirements.txt | 1 + framework/dbt/materialize/tests/test.yml | 25 +++++++++++++++++++ 11 files changed, 111 insertions(+) create mode 100644 framework/dbt/materialize/.gitignore create mode 100644 framework/dbt/materialize/README.md create mode 100644 framework/dbt/materialize/dbt_project.yml create mode 100644 framework/dbt/materialize/models/example/jobslog.sql create mode 100644 framework/dbt/materialize/models/example/test.sql create mode 100644 framework/dbt/materialize/packages.yml create mode 100644 framework/dbt/materialize/profiles.yml create mode 100644 framework/dbt/materialize/requirements.txt create mode 100644 framework/dbt/materialize/tests/test.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 444a5c5c..49217b23 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -112,6 +112,11 @@ updates: schedule: interval: "daily" + - directory: "/framework/dbt/materialize" + package-ecosystem: "pip" + schedule: + interval: "daily" + - directory: "/framework/gradio" package-ecosystem: "pip" schedule: diff --git a/.github/workflows/framework-dbt.yml b/.github/workflows/framework-dbt.yml index 871910f0..1dcf9458 100644 --- a/.github/workflows/framework-dbt.yml +++ b/.github/workflows/framework-dbt.yml @@ -69,3 +69,11 @@ jobs: pip install -r requirements.txt dbt run --debug dbt test --debug + + - name: Validate framework/dbt/materialize + run: | + cd framework/dbt/materialize + pip install -r requirements.txt + dbt deps + dbt run --debug + dbt test --debug diff --git a/framework/dbt/materialize/.gitignore b/framework/dbt/materialize/.gitignore new file mode 100644 index 00000000..08c48be9 --- /dev/null +++ b/framework/dbt/materialize/.gitignore @@ -0,0 +1,5 @@ +.user.yml +target/ +dbt_packages/ +logs/ +package-lock.yml diff --git a/framework/dbt/materialize/README.md b/framework/dbt/materialize/README.md new file mode 100644 index 00000000..2c471338 --- /dev/null +++ b/framework/dbt/materialize/README.md @@ -0,0 +1,25 @@ +# dbt CrateDB example about model materialization + +## What's Inside +Ephemeral and incremental materialization using the `delete+insert` +strategy. + +## Setup +```shell +uv pip install -r requirements.txt +``` + +## Usage + +Try running the following commands: +- `dbt run` +- `dbt test` + +Optionally, use `--debug` to display executed SQL statements. + +## Resources +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/framework/dbt/materialize/dbt_project.yml b/framework/dbt/materialize/dbt_project.yml new file mode 100644 index 00000000..94a3d157 --- /dev/null +++ b/framework/dbt/materialize/dbt_project.yml @@ -0,0 +1,12 @@ +# Project name and metadata. +name: 'testdrive_materialized' +version: '1.0.0' +config-version: 2 + +# Configure connection profile dbt uses for this project. +profile: 'cratedb_localhost' + +# Directories to be removed by `dbt clean`. +clean-targets: + - "target" + - "dbt_packages" diff --git a/framework/dbt/materialize/models/example/jobslog.sql b/framework/dbt/materialize/models/example/jobslog.sql new file mode 100644 index 00000000..36c92222 --- /dev/null +++ b/framework/dbt/materialize/models/example/jobslog.sql @@ -0,0 +1,3 @@ +{{ config(materialized='ephemeral') }} + +select * from sys.jobs_log diff --git a/framework/dbt/materialize/models/example/test.sql b/framework/dbt/materialize/models/example/test.sql new file mode 100644 index 00000000..a13b0d6d --- /dev/null +++ b/framework/dbt/materialize/models/example/test.sql @@ -0,0 +1,12 @@ +{{ config(materialized='incremental', + unique_key='id', + incremental_strategy='delete+insert' + ) +}} + +select * from {{ ref('jobslog') }} +{% if is_incremental() %} + + where started >= (select max(started) from {{ this }}) + +{% endif %} diff --git a/framework/dbt/materialize/packages.yml b/framework/dbt/materialize/packages.yml new file mode 100644 index 00000000..39f82d4d --- /dev/null +++ b/framework/dbt/materialize/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: dbt-labs/dbt_utils + version: 1.3.0 diff --git a/framework/dbt/materialize/profiles.yml b/framework/dbt/materialize/profiles.yml new file mode 100644 index 00000000..87e68f08 --- /dev/null +++ b/framework/dbt/materialize/profiles.yml @@ -0,0 +1,12 @@ +cratedb_localhost: + outputs: + dev: + type: cratedb + host: localhost + user: crate + pass: crate + port: 5432 + dbname: crate + schema: doc + catalog: crate + target: dev diff --git a/framework/dbt/materialize/requirements.txt b/framework/dbt/materialize/requirements.txt new file mode 100644 index 00000000..21bf3dc9 --- /dev/null +++ b/framework/dbt/materialize/requirements.txt @@ -0,0 +1 @@ +dbt-cratedb2>=0.0.1 diff --git a/framework/dbt/materialize/tests/test.yml b/framework/dbt/materialize/tests/test.yml new file mode 100644 index 00000000..84fd7d2f --- /dev/null +++ b/framework/dbt/materialize/tests/test.yml @@ -0,0 +1,25 @@ +models: + - name: test + columns: + - name: id + data_tests: + - unique + - not_null + - name: started + data_tests: + - unique + - not_null + - name: classification['type'] + data_tests: + - accepted_values: + values: ['SELECT', 'INSERT', 'DELETE', 'DDL', 'MANAGEMENT', 'UNDEFINED'] + - name: started + data_tests: + - dbt_utils.accepted_range: + min_value: 1734815733815 + max_value: "now()" + + #- name: classification['labels'] + # data_tests: + # - accepted_values: + # values: [[], ["Collect"], ["Collect", "Order"], ["Collect", "Union"], ["Collect", "GroupHashAggregate"], ["Collect", "GroupHashAggregate", "Order"], ["InsertFromValues"], ["TableFunction"]]