From 00bac331afdb8f317c17ca0c5c3c7d0e858eed37 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Thu, 16 Jan 2025 08:37:33 -0800 Subject: [PATCH] [components] Update docs commenting logic --- .../libraries/dagster-dg/dagster_dg/docs.py | 105 ++++++------------ .../utils_tests/test_sample_yaml.py | 6 +- 2 files changed, 38 insertions(+), 73 deletions(-) diff --git a/python_modules/libraries/dagster-dg/dagster_dg/docs.py b/python_modules/libraries/dagster-dg/dagster_dg/docs.py index a278e05628a9e..7ad4107142f30 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg/docs.py +++ b/python_modules/libraries/dagster-dg/dagster_dg/docs.py @@ -1,24 +1,17 @@ import tempfile import webbrowser -from collections.abc import Mapping, Sequence, Set -from dataclasses import dataclass -from typing import Any, Optional, Union +from collections.abc import Iterator, Mapping, Sequence +from typing import Any, Union import markdown import yaml +from dagster._utils.source_position import SourcePositionTree +from dagster._utils.yaml_utils import parse_yaml_with_source_positions +from dagster_components.core.schema.metadata import get_required_scope from dagster_dg.component import RemoteComponentType REF_BASE = "#/$defs/" -COMMENTED_MAPPING_TAG = "!commented_mapping" - - -@dataclass(frozen=True) -class CommentedObject: - key: str - value: Union[Sequence["CommentedObject"], Any] - comment: Optional[str] - top_level: bool = False def _dereference_schema( @@ -30,38 +23,22 @@ def _dereference_schema( return subschema -def _commented_object_for_subschema( - name: str, - json_schema: Mapping[str, Any], - subschema: Mapping[str, Any], - available_scope: Optional[Set[str]] = None, -) -> Union[CommentedObject, Any]: - additional_scope = subschema.get("dagster_available_scope") - available_scope = (available_scope or set()) | set(additional_scope or []) - +def _sample_value_for_subschema( + json_schema: Mapping[str, Any], subschema: Mapping[str, Any] +) -> Any: subschema = _dereference_schema(json_schema, subschema) if "anyOf" in subschema: # TODO: handle anyOf fields more gracefully, for now just choose first option - return _commented_object_for_subschema( - name, json_schema, subschema["anyOf"][0], available_scope=available_scope - ) + return _sample_value_for_subschema(json_schema, subschema["anyOf"][0]) objtype = subschema["type"] if objtype == "object": - return CommentedObject( - key=name, - value={ - k: _commented_object_for_subschema(k, json_schema, v) - for k, v in subschema.get("properties", {}).items() - }, - comment=f"Available scope: {available_scope}" if available_scope else None, - ) + return { + k: _sample_value_for_subschema(json_schema, v) + for k, v in subschema.get("properties", {}).items() + } elif objtype == "array": - return [ - _commented_object_for_subschema( - name, json_schema, subschema["items"], available_scope=available_scope - ) - ] + return [_sample_value_for_subschema(json_schema, subschema["items"])] elif objtype == "string": return "..." elif objtype == "integer": @@ -83,44 +60,32 @@ def write_line_break(self) -> None: super().write_line_break() super().write_line_break() - def _get_tag(self) -> str: - return getattr(self.event, "tag", "") - - def expect_node(self, root=False, sequence=False, mapping=False, simple_key=False): - # for commented mappings, emit comment above the value - tag = self._get_tag() - if tag.startswith(COMMENTED_MAPPING_TAG): - self.write_indicator(f"# {tag[len(COMMENTED_MAPPING_TAG) + 1:]}", True) - self.write_line_break() - - return super().expect_node(root, sequence, mapping, simple_key) - - def process_tag(self): - tag = self._get_tag() - # ignore the mapping tag as it's handled specially - if tag.startswith(COMMENTED_MAPPING_TAG): - return - else: - super().process_tag() - -def commented_object_representer(dumper: yaml.SafeDumper, obj: CommentedObject) -> yaml.nodes.Node: - mapping = obj.value if isinstance(obj.value, dict) else {obj.key: obj.value} - - if obj.comment is not None: - return dumper.represent_mapping(f"{COMMENTED_MAPPING_TAG}|{obj.comment}", mapping) - else: - return dumper.represent_dict(mapping) - - -ComponentDumper.add_representer(CommentedObject, commented_object_representer) +def _get_source_position_comments( + valpath: Sequence[Union[str, int]], tree: SourcePositionTree, json_schema: Mapping[str, Any] +) -> Iterator[tuple[int, str]]: + available_scope = get_required_scope(valpath[1:], json_schema) + if available_scope: + yield (tree.position.start.line - 1, f"Available scope: {available_scope}") + for child_path, child_tree in tree.children.items(): + yield from _get_source_position_comments([*valpath, child_path], child_tree, json_schema) def generate_sample_yaml(component_type: str, json_schema: Mapping[str, Any]) -> str: - params_obj = _commented_object_for_subschema("params", json_schema, json_schema) - return yaml.dump( - {"type": component_type, "params": params_obj}, Dumper=ComponentDumper, sort_keys=False + raw = yaml.dump( + {"type": component_type, "params": _sample_value_for_subschema(json_schema, json_schema)}, + Dumper=ComponentDumper, + sort_keys=False, ) + parsed = parse_yaml_with_source_positions(raw) + comments = dict(_get_source_position_comments([], parsed.source_position_tree, json_schema)) + commented_lines = [] + for line_num, line in enumerate(raw.split("\n")): + if line_num in comments: + commented_lines.append(f"{line} # {comments[line_num]}") + else: + commented_lines.append(line) + return "\n".join(commented_lines) def render_markdown_in_browser(markdown_content: str) -> None: diff --git a/python_modules/libraries/dagster-dg/dagster_dg_tests/utils_tests/test_sample_yaml.py b/python_modules/libraries/dagster-dg/dagster_dg_tests/utils_tests/test_sample_yaml.py index 7f5eced381cf8..1246970c75186 100644 --- a/python_modules/libraries/dagster-dg/dagster_dg_tests/utils_tests/test_sample_yaml.py +++ b/python_modules/libraries/dagster-dg/dagster_dg_tests/utils_tests/test_sample_yaml.py @@ -11,7 +11,7 @@ class SampleSubSchema(ComponentSchemaBaseModel): class SampleSchema(ComponentSchemaBaseModel): - sub_scoped: Annotated[SampleSubSchema, ResolvableFieldInfo(additional_scope={"outer_scope"})] + sub_scoped: Annotated[SampleSubSchema, ResolvableFieldInfo(required_scope={"outer_scope"})] sub_optional: SampleSubSchema sub_list: Sequence[SampleSubSchema] @@ -26,8 +26,8 @@ def test_generate_sample_yaml(): params: sub_scoped: # Available scope: {'outer_scope'} - str_field: '...' - int_field: 0 + str_field: '...' # Available scope: {'outer_scope'} + int_field: 0 # Available scope: {'outer_scope'} sub_optional: str_field: '...' int_field: 0