Skip to content

Commit

Permalink
[state:modified] persist unrendered_config from schema.yml, and more …
Browse files Browse the repository at this point in the history
…reliably compute unrendered_config from .sql files (#10487)
  • Loading branch information
MichelleArk authored Sep 26, 2024
1 parent 1fd4d2e commit b590045
Show file tree
Hide file tree
Showing 22 changed files with 756 additions and 31 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20240925-131028.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Ignore rendered jinja in configs for state:modified, behind state_modified_compare_more_unrendered_values
behaviour flag
time: 2024-09-25T13:10:28.490042+01:00
custom:
Author: michelleark
Issue: "9564"
3 changes: 3 additions & 0 deletions core/dbt/artifacts/resources/v1/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,16 @@ class ParsedResource(ParsedResourceMandatory):
unrendered_config: Dict[str, Any] = field(default_factory=dict)
created_at: float = field(default_factory=lambda: time.time())
config_call_dict: Dict[str, Any] = field(default_factory=dict)
unrendered_config_call_dict: Dict[str, Any] = field(default_factory=dict)
relation_name: Optional[str] = None
raw_code: str = ""

def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
dct = super().__post_serialize__(dct, context)
if context and context.get("artifact") and "config_call_dict" in dct:
del dct["config_call_dict"]
if context and context.get("artifact") and "unrendered_config_call_dict" in dct:
del dct["unrendered_config_call_dict"]
return dct


Expand Down
54 changes: 54 additions & 0 deletions core/dbt/clients/jinja_static.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,57 @@ def statically_parse_ref_or_source(expression: str) -> Union[RefArgs, List[str]]
raise ParsingError(f"Invalid ref or source expression: {expression}")

return ref_or_source


def statically_parse_unrendered_config(string: str) -> Optional[Dict[str, Any]]:
"""
Given a string with jinja, extract an unrendered config call.
If no config call is present, returns None.
For example, given:
"{{ config(materialized=env_var('DBT_TEST_STATE_MODIFIED')) }}\nselect 1 as id"
returns: {'materialized': "Keyword(key='materialized', value=Call(node=Name(name='env_var', ctx='load'), args=[Const(value='DBT_TEST_STATE_MODIFIED')], kwargs=[], dyn_args=None, dyn_kwargs=None))"}
No config call:
"select 1 as id"
returns: None
"""
# Return early to avoid creating jinja environemt if no config call in input string
if "config(" not in string:
return None

# set 'capture_macros' to capture undefined
env = get_environment(None, capture_macros=True)

global _TESTING_MACRO_CACHE
if test_caching_enabled() and _TESTING_MACRO_CACHE and string in _TESTING_MACRO_CACHE:
parsed = _TESTING_MACRO_CACHE.get(string, None)
func_calls = getattr(parsed, "_dbt_cached_calls")
else:
parsed = env.parse(string)
func_calls = tuple(parsed.find_all(jinja2.nodes.Call))

config_func_calls = list(
filter(
lambda f: hasattr(f, "node") and hasattr(f.node, "name") and f.node.name == "config",
func_calls,
)
)
# There should only be one {{ config(...) }} call per input
config_func_call = config_func_calls[0] if config_func_calls else None

if not config_func_call:
return None

unrendered_config = {}
for kwarg in config_func_call.kwargs:
unrendered_config[kwarg.key] = construct_static_kwarg_value(kwarg)

return unrendered_config


def construct_static_kwarg_value(kwarg):
# Instead of trying to re-assemble complex kwarg value, simply stringify the value
# This is still useful to be able to detect changes in unrendered configs, even if it is
# not an exact representation of the user input.
return str(kwarg)
20 changes: 19 additions & 1 deletion core/dbt/context/context_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dbt.adapters.factory import get_config_class_by_name
from dbt.config import IsFQNResource, Project, RuntimeConfig
from dbt.contracts.graph.model_config import get_config_for
from dbt.flags import get_flags
from dbt.node_types import NodeType
from dbt.utils import fqn_search
from dbt_common.contracts.config.base import BaseConfig, merge_config_dicts
Expand Down Expand Up @@ -286,6 +287,7 @@ def __init__(
project_name: str,
) -> None:
self._config_call_dict: Dict[str, Any] = {}
self._unrendered_config_call_dict: Dict[str, Any] = {}
self._active_project = active_project
self._fqn = fqn
self._resource_type = resource_type
Expand All @@ -295,6 +297,10 @@ def add_config_call(self, opts: Dict[str, Any]) -> None:
dct = self._config_call_dict
merge_config_dicts(dct, opts)

def add_unrendered_config_call(self, opts: Dict[str, Any]) -> None:
# Cannot perform complex merge behaviours on unrendered configs as they may not be appropriate types.
self._unrendered_config_call_dict.update(opts)

def build_config_dict(
self,
base: bool = False,
Expand All @@ -305,12 +311,24 @@ def build_config_dict(
if rendered:
# TODO CT-211
src = ContextConfigGenerator(self._active_project) # type: ignore[var-annotated]
config_call_dict = self._config_call_dict
else:
# TODO CT-211
src = UnrenderedConfigGenerator(self._active_project) # type: ignore[assignment]

# preserve legacy behaviour - using unreliable (potentially rendered) _config_call_dict
if get_flags().state_modified_compare_more_unrendered_values is False:
config_call_dict = self._config_call_dict
else:
# Prefer _config_call_dict if it is available and _unrendered_config_call_dict is not,
# as _unrendered_config_call_dict is unreliable for non-sql nodes (e.g. no jinja config block rendered for python models, etc)
if self._config_call_dict and not self._unrendered_config_call_dict:
config_call_dict = self._config_call_dict
else:
config_call_dict = self._unrendered_config_call_dict

return src.calculate_node_config_dict(
config_call_dict=self._config_call_dict,
config_call_dict=config_call_dict,
fqn=self._fqn,
resource_type=self._resource_type,
project_name=self._project_name,
Expand Down
10 changes: 10 additions & 0 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
UnitTestMacroGenerator,
get_rendered,
)
from dbt.clients.jinja_static import statically_parse_unrendered_config
from dbt.config import IsFQNResource, Project, RuntimeConfig
from dbt.constants import DEFAULT_ENV_PLACEHOLDER
from dbt.context.base import Var, contextmember, contextproperty
Expand Down Expand Up @@ -78,6 +79,7 @@
SecretEnvVarLocationError,
TargetNotFoundError,
)
from dbt.flags import get_flags
from dbt.materializations.incremental.microbatch import MicrobatchBuilder
from dbt.node_types import ModelLanguage, NodeType
from dbt.utils import MultiDict, args_to_dict
Expand Down Expand Up @@ -395,6 +397,14 @@ def __call__(self, *args, **kwargs):
# not call it!
if self.context_config is None:
raise DbtRuntimeError("At parse time, did not receive a context config")

# Track unrendered opts to build parsed node unrendered_config later on
if get_flags().state_modified_compare_more_unrendered_values:
unrendered_config = statically_parse_unrendered_config(self.model.raw_code)
if unrendered_config:
self.context_config.add_unrendered_config_call(unrendered_config)

# Use rendered opts to populate context_config
self.context_config.add_config_call(opts)
return ""

Expand Down
36 changes: 36 additions & 0 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ class SchemaSourceFile(BaseSourceFile):
# created too, but those are in 'sources'
sop: List[SourceKey] = field(default_factory=list)
env_vars: Dict[str, Any] = field(default_factory=dict)
unrendered_configs: Dict[str, Any] = field(default_factory=dict)
pp_dict: Optional[Dict[str, Any]] = None
pp_test_index: Optional[Dict[str, Any]] = None

Expand Down Expand Up @@ -317,6 +318,41 @@ def get_all_test_ids(self):
test_ids.extend(self.data_tests[key][name])
return test_ids

def add_unrendered_config(self, unrendered_config, yaml_key, name, version=None):
versioned_name = f"{name}_v{version}" if version is not None else name

if yaml_key not in self.unrendered_configs:
self.unrendered_configs[yaml_key] = {}

if versioned_name not in self.unrendered_configs[yaml_key]:
self.unrendered_configs[yaml_key][versioned_name] = unrendered_config

def get_unrendered_config(self, yaml_key, name, version=None) -> Optional[Dict[str, Any]]:
versioned_name = f"{name}_v{version}" if version is not None else name

if yaml_key not in self.unrendered_configs:
return None
if versioned_name not in self.unrendered_configs[yaml_key]:
return None

return self.unrendered_configs[yaml_key][versioned_name]

def delete_from_unrendered_configs(self, yaml_key, name):
# We delete all unrendered_configs for this yaml_key/name because the
# entry has been scheduled for reparsing.
if self.get_unrendered_config(yaml_key, name):
del self.unrendered_configs[yaml_key][name]
# Delete all versioned keys associated with name
version_names_to_delete = []
for potential_version_name in self.unrendered_configs[yaml_key]:
if potential_version_name.startswith(f"{name}_v"):
version_names_to_delete.append(potential_version_name)
for version_name in version_names_to_delete:
del self.unrendered_configs[yaml_key][version_name]

if not self.unrendered_configs[yaml_key]:
del self.unrendered_configs[yaml_key]

def add_env_var(self, var, yaml_key, name):
if yaml_key not in self.env_vars:
self.env_vars[yaml_key] = {}
Expand Down
4 changes: 3 additions & 1 deletion core/dbt/contracts/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,17 +337,19 @@ class ProjectFlags(ExtensibleDbtClassMixin):
warn_error_options: Optional[Dict[str, Union[str, List[str]]]] = None
write_json: Optional[bool] = None

# legacy behaviors
# legacy behaviors - https://github.com/dbt-labs/dbt-core/blob/main/docs/guides/behavior-change-flags.md
require_explicit_package_overrides_for_builtin_materializations: bool = True
require_resource_names_without_spaces: bool = False
source_freshness_run_project_hooks: bool = False
state_modified_compare_more_unrendered_values: bool = False

@property
def project_only_flags(self) -> Dict[str, Any]:
return {
"require_explicit_package_overrides_for_builtin_materializations": self.require_explicit_package_overrides_for_builtin_materializations,
"require_resource_names_without_spaces": self.require_resource_names_without_spaces,
"source_freshness_run_project_hooks": self.source_freshness_run_project_hooks,
"state_modified_compare_more_unrendered_values": self.state_modified_compare_more_unrendered_values,
}


Expand Down
18 changes: 18 additions & 0 deletions core/dbt/parser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
generate_generate_name_macro_context,
generate_parser_model_context,
)
from dbt.contracts.files import SchemaSourceFile
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.nodes import BaseNode, ManifestNode
from dbt.contracts.graph.unparsed import Docs, UnparsedNode
Expand All @@ -22,9 +23,12 @@
DictParseError,
InvalidAccessTypeError,
)
from dbt.flags import get_flags
from dbt.node_types import AccessType, ModelLanguage, NodeType
from dbt.parser.common import resource_types_to_schema_file_keys
from dbt.parser.search import FileBlock
from dbt_common.dataclass_schema import ValidationError
from dbt_common.utils import deep_merge

# internally, the parser may store a less-restrictive type that will be
# transformed into the final type. But it will have to be derived from
Expand Down Expand Up @@ -308,6 +312,7 @@ def update_parsed_node_config(
config: ContextConfig,
context=None,
patch_config_dict=None,
patch_file_id=None,
) -> None:
"""Given the ContextConfig used for parsing and the parsed node,
generate and set the true values to use, overriding the temporary parse
Expand Down Expand Up @@ -369,13 +374,26 @@ def update_parsed_node_config(
if hasattr(parsed_node, "contract"):
parsed_node.contract = Contract.from_dict(contract_dct)

if get_flags().state_modified_compare_more_unrendered_values:
# Use the patch_file.unrendered_configs if available to update patch_dict_config,
# as provided patch_config_dict may actuallly already be rendered and thus sensitive to jinja evaluations
if patch_file_id:
patch_file = self.manifest.files.get(patch_file_id, None)
if patch_file and isinstance(patch_file, SchemaSourceFile):
schema_key = resource_types_to_schema_file_keys[parsed_node.resource_type]
if unrendered_patch_config := patch_file.get_unrendered_config(
schema_key, parsed_node.name, getattr(parsed_node, "version", None)
):
patch_config_dict = deep_merge(patch_config_dict, unrendered_patch_config)

# unrendered_config is used to compare the original database/schema/alias
# values and to handle 'same_config' and 'same_contents' calls
parsed_node.unrendered_config = config.build_config_dict(
rendered=False, patch_config_dict=patch_config_dict
)

parsed_node.config_call_dict = config._config_call_dict
parsed_node.unrendered_config_call_dict = config._unrendered_config_call_dict

# do this once before we parse the node database/schema/alias, so
# parsed_node.config is what it would be if they did nothing
Expand Down
20 changes: 20 additions & 0 deletions core/dbt/parser/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,31 @@
UnparsedSingularTestUpdate,
)
from dbt.exceptions import ParsingError
from dbt.node_types import NodeType
from dbt.parser.search import FileBlock
from dbt_common.contracts.constraints import ColumnLevelConstraint, ConstraintType
from dbt_common.exceptions import DbtInternalError
from dbt_semantic_interfaces.type_enums import TimeGranularity

schema_file_keys_to_resource_types = {
"models": NodeType.Model,
"seeds": NodeType.Seed,
"snapshots": NodeType.Snapshot,
"sources": NodeType.Source,
"macros": NodeType.Macro,
"analyses": NodeType.Analysis,
"exposures": NodeType.Exposure,
"metrics": NodeType.Metric,
"semantic_models": NodeType.SemanticModel,
"saved_queries": NodeType.SavedQuery,
}

resource_types_to_schema_file_keys = {
v: k for (k, v) in schema_file_keys_to_resource_types.items()
}

schema_file_keys = list(schema_file_keys_to_resource_types.keys())


def trimmed(inp: str) -> str:
if len(inp) < 50:
Expand Down
1 change: 1 addition & 0 deletions core/dbt/parser/partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,7 @@ def merge_patch(self, schema_file, key, patch, new_patch=False):
pp_dict[key].append(patch)

schema_file.delete_from_env_vars(key, patch["name"])
schema_file.delete_from_unrendered_configs(key, patch["name"])
self.add_to_pp_files(schema_file)

# For model, seed, snapshot, analysis schema dictionary keys,
Expand Down
3 changes: 2 additions & 1 deletion core/dbt/parser/read_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
)
from dbt.events.types import InputFileDiffError
from dbt.exceptions import ParsingError
from dbt.parser.schemas import schema_file_keys, yaml_from_file
from dbt.parser.common import schema_file_keys
from dbt.parser.schemas import yaml_from_file
from dbt.parser.search import filesystem_search
from dbt_common.clients.system import load_file_contents
from dbt_common.dataclass_schema import dbtClassMixin
Expand Down
Loading

0 comments on commit b590045

Please sign in to comment.