Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[components] Update docs commenting logic #27162

Merged
merged 1 commit into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 88 additions & 68 deletions python_modules/libraries/dagster-dg/dagster_dg/docs.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,72 @@
import tempfile
import webbrowser
from collections.abc import Mapping, Sequence, Set
from dataclasses import dataclass
from typing import Any, Optional, Union
from collections.abc import Iterator, Mapping, Sequence, Set
from typing import Any, Union

import markdown
import yaml
from dagster._utils.source_position import SourcePositionTree
from dagster._utils.yaml_utils import parse_yaml_with_source_positions

from dagster_dg.component import RemoteComponentType

REF_BASE = "#/$defs/"
COMMENTED_MAPPING_TAG = "!commented_mapping"
JSON_SCHEMA_EXTRA_REQUIRED_SCOPE_KEY = "dagster_required_scope"


@dataclass(frozen=True)
class CommentedObject:
key: str
value: Union[Sequence["CommentedObject"], Any]
comment: Optional[str]
top_level: bool = False
def _subschemas_on_path(
valpath: Sequence[Union[str, int]], json_schema: Mapping[str, Any], subschema: Mapping[str, Any]
) -> Iterator[Mapping[str, Any]]:
"""Given a valpath and the json schema of a given target type, returns the subschemas at each step of the path."""
# List[ComplexType] (e.g.) will contain a reference to the complex type schema in the
# top-level $defs, so we dereference it here.
if "$ref" in subschema:
# depending on the pydantic version, the extras may be stored with the reference or not
extras = {k: v for k, v in subschema.items() if k != "$ref"}
subschema = {**json_schema["$defs"].get(subschema["$ref"][len(REF_BASE) :]), **extras}

yield subschema
if len(valpath) == 0:
return

# Optional[ComplexType] (e.g.) will contain multiple schemas in the "anyOf" field
if "anyOf" in subschema:
for inner in subschema["anyOf"]:
yield from _subschemas_on_path(valpath, json_schema, inner)

el = valpath[0]
if isinstance(el, str):
# valpath: ['field']
# field: X
inner = subschema.get("properties", {}).get(el)
elif isinstance(el, int):
# valpath: ['field', 0]
# field: List[X]
inner = subschema.get("items")
else:
raise ValueError(f"Invalid valpath element: {el}")

# the path wasn't valid, or unspecified
if not inner:
return

_, *rest = valpath
yield from _subschemas_on_path(rest, json_schema, inner)
Comment on lines +37 to +54
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There appears to be a bug in the path traversal logic. The current code extracts valpath[0] before slicing off the first element with _, *rest = valpath, which means the same element is processed twice. Moving the _, *rest = valpath line before the el = valpath[0] check and using el = first would fix this issue and ensure correct path traversal.

Example fix:

first, *rest = valpath
el = first

Spotted by Graphite Reviewer

Is this helpful? React 👍 or 👎 to let us know.



def _get_additional_required_scope(subschema: Mapping[str, Any]) -> Set[str]:
raw = subschema.get(JSON_SCHEMA_EXTRA_REQUIRED_SCOPE_KEY)
return set(raw) if raw else set()


def get_required_scope(
valpath: Sequence[Union[str, int]], json_schema: Mapping[str, Any]
) -> Set[str]:
"""Given a valpath and the json schema of a given target type, determines the available rendering scope."""
required_scope = set()
for subschema in _subschemas_on_path(valpath, json_schema, json_schema):
required_scope |= _get_additional_required_scope(subschema)
return required_scope


def _dereference_schema(
Expand All @@ -30,38 +78,22 @@ def _dereference_schema(
return subschema


def _commented_object_for_subschema(
name: str,
json_schema: Mapping[str, Any],
subschema: Mapping[str, Any],
required_scope: Optional[Set[str]] = None,
) -> Union[CommentedObject, Any]:
additional_scope = subschema.get("dagster_required_scope")
required_scope = (required_scope or set()) | set(additional_scope or [])

def _sample_value_for_subschema(
json_schema: Mapping[str, Any], subschema: Mapping[str, Any]
) -> Any:
subschema = _dereference_schema(json_schema, subschema)
if "anyOf" in subschema:
# TODO: handle anyOf fields more gracefully, for now just choose first option
return _commented_object_for_subschema(
name, json_schema, subschema["anyOf"][0], required_scope=required_scope
)
return _sample_value_for_subschema(json_schema, subschema["anyOf"][0])

objtype = subschema["type"]
if objtype == "object":
return CommentedObject(
key=name,
value={
k: _commented_object_for_subschema(k, json_schema, v)
for k, v in subschema.get("properties", {}).items()
},
comment=f"Available scope: {required_scope}" if required_scope else None,
)
return {
k: _sample_value_for_subschema(json_schema, v)
for k, v in subschema.get("properties", {}).items()
}
elif objtype == "array":
return [
_commented_object_for_subschema(
name, json_schema, subschema["items"], required_scope=required_scope
)
]
return [_sample_value_for_subschema(json_schema, subschema["items"])]
elif objtype == "string":
return "..."
elif objtype == "integer":
Expand All @@ -83,44 +115,32 @@ def write_line_break(self) -> None:
super().write_line_break()
super().write_line_break()

def _get_tag(self) -> str:
return getattr(self.event, "tag", "")

def expect_node(self, root=False, sequence=False, mapping=False, simple_key=False):
# for commented mappings, emit comment above the value
tag = self._get_tag()
if tag.startswith(COMMENTED_MAPPING_TAG):
self.write_indicator(f"# {tag[len(COMMENTED_MAPPING_TAG) + 1:]}", True)
self.write_line_break()

return super().expect_node(root, sequence, mapping, simple_key)

def process_tag(self):
tag = self._get_tag()
# ignore the mapping tag as it's handled specially
if tag.startswith(COMMENTED_MAPPING_TAG):
return
else:
super().process_tag()


def commented_object_representer(dumper: yaml.SafeDumper, obj: CommentedObject) -> yaml.nodes.Node:
mapping = obj.value if isinstance(obj.value, dict) else {obj.key: obj.value}

if obj.comment is not None:
return dumper.represent_mapping(f"{COMMENTED_MAPPING_TAG}|{obj.comment}", mapping)
else:
return dumper.represent_dict(mapping)


ComponentDumper.add_representer(CommentedObject, commented_object_representer)
def _get_source_position_comments(
valpath: Sequence[Union[str, int]], tree: SourcePositionTree, json_schema: Mapping[str, Any]
) -> Iterator[tuple[int, str]]:
available_scope = get_required_scope(valpath[1:], json_schema)
if available_scope:
yield (tree.position.start.line - 1, f"Available scope: {available_scope}")
for child_path, child_tree in tree.children.items():
yield from _get_source_position_comments([*valpath, child_path], child_tree, json_schema)


def generate_sample_yaml(component_type: str, json_schema: Mapping[str, Any]) -> str:
params_obj = _commented_object_for_subschema("params", json_schema, json_schema)
return yaml.dump(
{"type": component_type, "params": params_obj}, Dumper=ComponentDumper, sort_keys=False
raw = yaml.dump(
{"type": component_type, "params": _sample_value_for_subschema(json_schema, json_schema)},
Dumper=ComponentDumper,
sort_keys=False,
)
parsed = parse_yaml_with_source_positions(raw)
comments = dict(_get_source_position_comments([], parsed.source_position_tree, json_schema))
commented_lines = []
for line_num, line in enumerate(raw.split("\n")):
if line_num in comments:
commented_lines.append(f"{line} # {comments[line_num]}")
else:
commented_lines.append(line)
return "\n".join(commented_lines)


def render_markdown_in_browser(markdown_content: str) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class SampleSubSchema(ComponentSchemaBaseModel):


class SampleSchema(ComponentSchemaBaseModel):
sub_scoped: Annotated[SampleSubSchema, ResolvableFieldInfo(additional_scope={"outer_scope"})]
sub_scoped: Annotated[SampleSubSchema, ResolvableFieldInfo(required_scope={"outer_scope"})]
sub_optional: SampleSubSchema
sub_list: Sequence[SampleSubSchema]

Expand All @@ -26,8 +26,8 @@ def test_generate_sample_yaml():

params:
sub_scoped: # Available scope: {'outer_scope'}
str_field: '...'
int_field: 0
str_field: '...' # Available scope: {'outer_scope'}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it was actually a bug that these comments weren't showing up before

in terms of desired behavior, the previous output might actually have been nicer but now it's much easier to choose which way we want this to appear

int_field: 0 # Available scope: {'outer_scope'}
sub_optional:
str_field: '...'
int_field: 0
Expand Down
Loading