diff --git a/.semversioner/next-release/patch-20260512161909030693.json b/.semversioner/next-release/patch-20260512161909030693.json new file mode 100644 index 0000000000..3cc57c21de --- /dev/null +++ b/.semversioner/next-release/patch-20260512161909030693.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Preserve bare `$` characters in config values (e.g. `file_pattern: \".*\\\\.md$\"`) by using `Template.safe_substitute`; missing env vars still raise `ConfigParsingError`." +} diff --git a/packages/graphrag-common/graphrag_common/config/load_config.py b/packages/graphrag-common/graphrag_common/config/load_config.py index c8929149f1..216bb50242 100644 --- a/packages/graphrag-common/graphrag_common/config/load_config.py +++ b/packages/graphrag-common/graphrag_common/config/load_config.py @@ -83,12 +83,21 @@ def _get_parser_for_file(file_path: str | Path) -> Callable[[str], dict[str, Any def _parse_env_variables(text: str) -> str: - """Parse environment variables in the configuration text.""" - try: - return Template(text).substitute(os.environ) - except KeyError as error: - msg = f"Environment variable not found: {error}" - raise ConfigParsingError(msg) from error + r"""Parse environment variables in the configuration text. + + Uses ``Template.safe_substitute`` so that bare ``$`` characters appearing in + legitimate config values (for example, regex anchors like ``.*\.md$`` in a + ``file_pattern``) are passed through unchanged instead of raising a + ``ValueError`` for invalid placeholders. Missing environment variables are + still detected up-front and surfaced as ``ConfigParsingError``. + """ + template = Template(text) + for match in template.pattern.finditer(text): + name = match.group("named") or match.group("braced") + if name is not None and name not in os.environ: + msg = f"Environment variable not found: '{name}'" + raise ConfigParsingError(msg) + return template.safe_substitute(os.environ) def _recursive_merge_dicts(dest: dict[str, Any], src: dict[str, Any]) -> None: diff --git a/tests/unit/load_config/fixtures/config_with_dollar.yaml b/tests/unit/load_config/fixtures/config_with_dollar.yaml new file mode 100644 index 0000000000..bd1a818899 --- /dev/null +++ b/tests/unit/load_config/fixtures/config_with_dollar.yaml @@ -0,0 +1,10 @@ +name: "match_dollar_$" +value: 100 +nested: + nested_str: ".*\\.md$" + nested_int: 42 +nested_list: + - nested_str: "list_value_1" + nested_int: 7 + - nested_str: "trailing$" + nested_int: 8 diff --git a/tests/unit/load_config/fixtures/config_with_dollar_and_env.yaml b/tests/unit/load_config/fixtures/config_with_dollar_and_env.yaml new file mode 100644 index 0000000000..f2da9202ed --- /dev/null +++ b/tests/unit/load_config/fixtures/config_with_dollar_and_env.yaml @@ -0,0 +1,10 @@ +name: ${LOAD_CONFIG_NAME} +value: 100 +nested: + nested_str: ".*\\.md$" + nested_int: 42 +nested_list: + - nested_str: "list_value_1" + nested_int: 7 + - nested_str: "list_value_2" + nested_int: 8 diff --git a/tests/unit/load_config/test_load_config.py b/tests/unit/load_config/test_load_config.py index 0945cf214f..e8e2ce40f2 100644 --- a/tests/unit/load_config/test_load_config.py +++ b/tests/unit/load_config/test_load_config.py @@ -155,3 +155,43 @@ def test_load_config(): assert config_with_env_vars.nested_list[0].nested_int == 7 assert config_with_env_vars.nested_list[1].nested_str == "list_value_2" assert config_with_env_vars.nested_list[1].nested_int == 8 + + +def test_load_config_preserves_literal_dollar_signs(): + """Bare ``$`` characters in config values must be preserved verbatim. + + Regex anchors like ``.*\\.md$`` in fields such as ``file_pattern`` are valid + config values; ``string.Template.substitute`` previously raised + ``ValueError`` for these, crashing config load. The loader now uses + ``safe_substitute`` so invalid placeholders are passed through unchanged. + """ + config_directory = Path(__file__).parent / "fixtures" + config_path = config_directory / "config_with_dollar.yaml" + + config = load_config( + config_initializer=TestConfigModel, + config_path=config_path, + set_cwd=False, + ) + + assert config.name == "match_dollar_$" + assert config.nested.nested_str == ".*\\.md$" + assert config.nested_list[1].nested_str == "trailing$" + + +def test_load_config_mixed_env_var_and_literal_dollar(): + """``${VAR}`` substitution and bare ``$`` literals must coexist.""" + config_directory = Path(__file__).parent / "fixtures" + config_path = config_directory / "config_with_dollar_and_env.yaml" + env_path = config_directory / "test.env" + + cwd = Path.cwd() + config = load_config( + config_initializer=TestConfigModel, + config_path=config_path, + dot_env_path=env_path, + ) + os.chdir(cwd) + + assert config.name == "env_name" + assert config.nested.nested_str == ".*\\.md$"