From 26eb7726da05ec54bcf6726a65d60c56bd3efe66 Mon Sep 17 00:00:00 2001 From: the4thamigo-uk <7022874+the4thamigo-uk@users.noreply.github.com> Date: Thu, 21 Nov 2024 12:18:06 +0000 Subject: [PATCH] Prevent data loss when updating materialized view (#383) --- CHANGELOG.md | 5 ++ .../materializations/materialized_view.sql | 67 +++++++++++++------ 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e23f02c..c0f584c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +### Unreleased + +### Improvement +* Materialized view now attempts to use `ALTER TABLE...MODIFY QUERY` to update existing materialized views. This is an atomic operation so data is not lost. ([#390](https://github.com/ClickHouse/dbt-clickhouse/pull/390)) + ### Release [1.8.5], 2024-11-19 ### New Features diff --git a/dbt/include/clickhouse/macros/materializations/materialized_view.sql b/dbt/include/clickhouse/macros/materializations/materialized_view.sql index 4583918b..e219ec0e 100644 --- a/dbt/include/clickhouse/macros/materializations/materialized_view.sql +++ b/dbt/include/clickhouse/macros/materializations/materialized_view.sql @@ -84,19 +84,25 @@ {% else %} {{ log('No existing mvs found matching the pattern. continuing..', info=True) }} {% endif %} - {{ clickhouse__drop_mvs(target_relation, cluster_clause, views) }} {% if should_full_refresh() %} + {{ clickhouse__drop_mvs(target_relation, cluster_clause, views) }} + {% call statement('main') -%} {{ get_create_table_as_sql(False, backup_relation, sql) }} {%- endcall %} {% do exchange_tables_atomic(backup_relation, existing_relation) %} + + {{ clickhouse__create_mvs(existing_relation, cluster_clause, views) }} {% else %} -- we need to have a 'main' statement {% call statement('main') -%} select 1 {%- endcall %} + + -- try to alter view first to replace sql, else drop and create + {{ clickhouse__update_mvs(target_relation, cluster_clause, views) }} + {% endif %} - {{ clickhouse__create_mvs(existing_relation, cluster_clause, views) }} {% else %} {{ log('Replacing existing materialized view ' + target_relation.name) }} {{ clickhouse__replace_mv(target_relation, existing_relation, intermediate_relation, backup_relation, sql, views) }} @@ -142,31 +148,54 @@ {%- endmacro %} {% macro clickhouse__drop_mv(mv_relation, cluster_clause) -%} + {% call statement('drop existing mv: ' + mv_relation.name) -%} drop view if exists {{ mv_relation }} {{ cluster_clause }} -{%- endmacro %}u + {% endcall %} +{%- endmacro %} -{% macro clickhouse__create_mv(mv_relation, target_table, cluster_clause, sql) -%} - create materialized view if not exists {{ mv_relation }} {{ cluster_clause }} - to {{ target_table }} - as {{ sql }} +{% macro clickhouse__create_mv(mv_relation, target_relation, cluster_clause, view_sql) -%} + {% call statement('create existing mv: ' + mv_relation.name) -%} + create materialized view if not exists {{ mv_relation }} {{ cluster_clause }} + to {{ target_relation }} + as {{ view_sql }} + {% endcall %} +{%- endmacro %} + +{% macro clickhouse__modify_mv(mv_relation, cluster_clause, view_sql) -%} + {% call statement('modify existing mv: ' + mv_relation.name) -%} + alter table {{ mv_relation }} {{ cluster_clause }} modify query {{ view_sql }} + {% endcall %} +{%- endmacro %} + +{% macro clickhouse__update_mv(mv_relation, target_relation, cluster_clause, view_sql) -%} + {% set existing_relation = adapter.get_relation(database=mv_relation.database, schema=mv_relation.schema, identifier=mv_relation.identifier) %} + {% if existing_relation %} + {{ clickhouse__modify_mv(mv_relation, cluster_clause, view_sql) }}; + {% else %} + {{ clickhouse__drop_mv(mv_relation, cluster_clause) }}; + {{ clickhouse__create_mv(mv_relation, target_relation, cluster_clause, view_sql) }}; + {% endif %} {%- endmacro %} {% macro clickhouse__drop_mvs(target_relation, cluster_clause, views) -%} - {% for view in views.keys() %} - {%- set mv_relation = target_relation.derivative('_' + view, 'materialized_view') -%} - {% call statement('drop existing mv: ' + view) -%} - {{ clickhouse__drop_mv(mv_relation, cluster_clause) }}; - {% endcall %} - {% endfor %} + {% for view in views.keys() %} + {%- set mv_relation = target_relation.derivative('_' + view, 'materialized_view') -%} + {{ clickhouse__drop_mv(mv_relation, cluster_clause) }}; + {% endfor %} {%- endmacro %} {% macro clickhouse__create_mvs(target_relation, cluster_clause, views) -%} - {% for view, view_sql in views.items() %} - {%- set mv_relation = target_relation.derivative('_' + view, 'materialized_view') -%} - {% call statement('create existing mv: ' + view) -%} - {{ clickhouse__create_mv(mv_relation, target_relation, cluster_clause, view_sql) }}; - {% endcall %} - {% endfor %} + {% for view, view_sql in views.items() %} + {%- set mv_relation = target_relation.derivative('_' + view, 'materialized_view') -%} + {{ clickhouse__create_mv(mv_relation, target_relation, cluster_clause, view_sql) }}; + {% endfor %} +{%- endmacro %} + +{% macro clickhouse__update_mvs(target_relation, cluster_clause, views) -%} + {% for view, view_sql in views.items() %} + {%- set mv_relation = target_relation.derivative('_' + view, 'materialized_view') -%} + {{ clickhouse__update_mv(mv_relation, target_relation, cluster_clause, view_sql) }}; + {% endfor %} {%- endmacro %} {% macro clickhouse__replace_mv(target_relation, existing_relation, intermediate_relation, backup_relation, sql, views) %}