Skip to content

Commit

Permalink
[components] Update docs commenting logic
Browse files Browse the repository at this point in the history
  • Loading branch information
OwenKephart committed Jan 16, 2025
1 parent 6bd100f commit aeb1eec
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 73 deletions.
105 changes: 35 additions & 70 deletions python_modules/libraries/dagster-dg/dagster_dg/docs.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,17 @@
import tempfile
import webbrowser
from collections.abc import Mapping, Sequence, Set
from dataclasses import dataclass
from typing import Any, Optional, Union
from collections.abc import Iterator, Mapping, Sequence
from typing import Any, Union

import markdown
import yaml
from dagster._utils.source_position import SourcePositionTree
from dagster._utils.yaml_utils import parse_yaml_with_source_positions
from dagster_components.core.schema.metadata import get_required_scope

from dagster_dg.component import RemoteComponentType

REF_BASE = "#/$defs/"
COMMENTED_MAPPING_TAG = "!commented_mapping"


@dataclass(frozen=True)
class CommentedObject:
key: str
value: Union[Sequence["CommentedObject"], Any]
comment: Optional[str]
top_level: bool = False


def _dereference_schema(
Expand All @@ -30,38 +23,22 @@ def _dereference_schema(
return subschema


def _commented_object_for_subschema(
name: str,
json_schema: Mapping[str, Any],
subschema: Mapping[str, Any],
available_scope: Optional[Set[str]] = None,
) -> Union[CommentedObject, Any]:
additional_scope = subschema.get("dagster_available_scope")
available_scope = (available_scope or set()) | set(additional_scope or [])

def _sample_value_for_subschema(
json_schema: Mapping[str, Any], subschema: Mapping[str, Any]
) -> Any:
subschema = _dereference_schema(json_schema, subschema)
if "anyOf" in subschema:
# TODO: handle anyOf fields more gracefully, for now just choose first option
return _commented_object_for_subschema(
name, json_schema, subschema["anyOf"][0], available_scope=available_scope
)
return _sample_value_for_subschema(json_schema, subschema["anyOf"][0])

objtype = subschema["type"]
if objtype == "object":
return CommentedObject(
key=name,
value={
k: _commented_object_for_subschema(k, json_schema, v)
for k, v in subschema.get("properties", {}).items()
},
comment=f"Available scope: {available_scope}" if available_scope else None,
)
return {
k: _sample_value_for_subschema(json_schema, v)
for k, v in subschema.get("properties", {}).items()
}
elif objtype == "array":
return [
_commented_object_for_subschema(
name, json_schema, subschema["items"], available_scope=available_scope
)
]
return [_sample_value_for_subschema(json_schema, subschema["items"])]
elif objtype == "string":
return "..."
elif objtype == "integer":
Expand All @@ -83,44 +60,32 @@ def write_line_break(self) -> None:
super().write_line_break()
super().write_line_break()

def _get_tag(self) -> str:
return getattr(self.event, "tag", "")

def expect_node(self, root=False, sequence=False, mapping=False, simple_key=False):
# for commented mappings, emit comment above the value
tag = self._get_tag()
if tag.startswith(COMMENTED_MAPPING_TAG):
self.write_indicator(f"# {tag[len(COMMENTED_MAPPING_TAG) + 1:]}", True)
self.write_line_break()

return super().expect_node(root, sequence, mapping, simple_key)

def process_tag(self):
tag = self._get_tag()
# ignore the mapping tag as it's handled specially
if tag.startswith(COMMENTED_MAPPING_TAG):
return
else:
super().process_tag()


def commented_object_representer(dumper: yaml.SafeDumper, obj: CommentedObject) -> yaml.nodes.Node:
mapping = obj.value if isinstance(obj.value, dict) else {obj.key: obj.value}

if obj.comment is not None:
return dumper.represent_mapping(f"{COMMENTED_MAPPING_TAG}|{obj.comment}", mapping)
else:
return dumper.represent_dict(mapping)


ComponentDumper.add_representer(CommentedObject, commented_object_representer)
def _get_source_position_comments(
valpath: Sequence[Union[str, int]], tree: SourcePositionTree, json_schema: Mapping[str, Any]
) -> Iterator[tuple[int, str]]:
available_scope = get_required_scope(valpath[1:], json_schema)
if available_scope:
yield (tree.position.start.line - 1, f"Available scope: {available_scope}")
for child_path, child_tree in tree.children.items():
yield from _get_source_position_comments([*valpath, child_path], child_tree, json_schema)


def generate_sample_yaml(component_type: str, json_schema: Mapping[str, Any]) -> str:
params_obj = _commented_object_for_subschema("params", json_schema, json_schema)
return yaml.dump(
{"type": component_type, "params": params_obj}, Dumper=ComponentDumper, sort_keys=False
raw = yaml.dump(
{"type": component_type, "params": _sample_value_for_subschema(json_schema, json_schema)},
Dumper=ComponentDumper,
sort_keys=False,
)
parsed = parse_yaml_with_source_positions(raw)
comments = dict(_get_source_position_comments([], parsed.source_position_tree, json_schema))
commented_lines = []
for line_num, line in enumerate(raw.split("\n")):
if line_num in comments:
commented_lines.append(f"{line} # {comments[line_num]}")
else:
commented_lines.append(line)
return "\n".join(commented_lines)


def render_markdown_in_browser(markdown_content: str) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class SampleSubSchema(ComponentSchemaBaseModel):


class SampleSchema(ComponentSchemaBaseModel):
sub_scoped: Annotated[SampleSubSchema, ResolvableFieldInfo(additional_scope={"outer_scope"})]
sub_scoped: Annotated[SampleSubSchema, ResolvableFieldInfo(required_scope={"outer_scope"})]
sub_optional: SampleSubSchema
sub_list: Sequence[SampleSubSchema]

Expand All @@ -26,8 +26,8 @@ def test_generate_sample_yaml():
params:
sub_scoped: # Available scope: {'outer_scope'}
str_field: '...'
int_field: 0
str_field: '...' # Available scope: {'outer_scope'}
int_field: 0 # Available scope: {'outer_scope'}
sub_optional:
str_field: '...'
int_field: 0
Expand Down

0 comments on commit aeb1eec

Please sign in to comment.