Skip to content

Commit

Permalink
Expand/resource (#39)
Browse files Browse the repository at this point in the history
* Expand out resource to all resource edges in graph

* fix pytests

* add Group and SubstanceDefinition (#40)

---------

Co-authored-by: Quinn Wai Wong <[email protected]>
  • Loading branch information
matthewpeterkort and quinnwai authored Dec 4, 2024
1 parent e50dfb8 commit 9aa79a0
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 22 deletions.
6 changes: 4 additions & 2 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,23 @@ dependency_order:
#
- Organization
# - Location
- Group
- Practitioner
- PractitionerRole
- ResearchStudy
- Patient
- ResearchSubject
- Substance
- SubstanceDefinition
- Specimen
# - Encounter
- Observation
- DiagnosticReport
- Condition
- Medication
- MedicationAdministration
# - MedicationStatement
# - MedicationRequest
- MedicationStatement
- MedicationRequest
- Procedure
- DocumentReference
- Task
Expand Down
3 changes: 2 additions & 1 deletion iceberg_tools/cli/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def generate_bmeg(output_path, config_path, stats):
with SchemaLinkWriter() as mgr:
for klass, schema in schemas.items():
with open(output_path / pathlib.Path(klass + ".yaml"), "w") as fp:
schema = mgr.insert_links(schema, classes)
dependency_order = gen3_config["dependency_order"]
schema = mgr.insert_links(schema, classes, dependency_order)
yaml.dump(schema, fp)
logger.info(f"Individual yaml schemas written to {output_path}/*.yaml")

Expand Down
26 changes: 15 additions & 11 deletions iceberg_tools/graph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def _extract_target_hints(schema_link):
return directionality, multiplicity, association


def _generate_links_from_fhir_references(schema, classes) -> List[dict]:
def _generate_links_from_fhir_references(schema, classes, dependency_order) -> List[dict]:
"""Generate links for a schema.
Parameters
Expand All @@ -79,14 +79,15 @@ def _generate_links_from_fhir_references(schema, classes) -> List[dict]:

# Direct links from {schema['title']}"
links = []
links.extend(_extract_links(schema, classes))
dependency_order = [elem for elem in dependency_order if elem not in ["_definitions.yaml", "_terms.yaml", "Program", "Project"]]
links.extend(_extract_links(schema, classes, dependency_order))

# Nested links
nested_links = []
for nested_schema, path in _extract_nested_schemas(schema):
if nested_schema['title'] in NESTED_OBJECTS_IGNORE:
continue
extracted_links = _extract_links(nested_schema, classes)
extracted_links = _extract_links(nested_schema, classes, dependency_order)
if len(extracted_links) == 0:
continue

Expand Down Expand Up @@ -137,11 +138,12 @@ def _extract_nested_schemas(schema) -> Iterator[tuple[dict, str]]:
yield sub_schema, match


def _extract_links(schema: dict, classes) -> List[dict]:
def _extract_links(schema: dict, classes, dependency_order) -> List[dict]:
"""Extract Link Description Object (LDO) from a schema.
see https://json-schema.org/draft/2019-09/json-schema-hypermedia.html#rfc.section.6
"""

refs_finder = RefFinder(schema, refs=True)
matches = sorted(refs_finder.find_refs())

Expand All @@ -155,8 +157,10 @@ def _extract_links(schema: dict, classes) -> List[dict]:
multiplicity = 'has_many'
property_name = match.split('.')[1]
property_ = schema['properties'][property_name]
if 'enum_reference_types' not in property_:
property_['enum_reference_types'] = ['__ANY__']

if 'enum_reference_types' not in property_ or\
(len(property_["enum_reference_types"]) == 1 and property_["enum_reference_types"][0] == "Resource"):
property_['enum_reference_types'] = dependency_order
append_postscript = len(property_['enum_reference_types']) > 1
_path = '.'.join(match.split('.')[1:-1])
_path = _path + '.reference'
Expand Down Expand Up @@ -394,7 +398,7 @@ def _extract_link_parts(self, schema_link: dict, instance: dict) -> dict:
class VertexSchemaDecorator:
"""Adds links to vertex schema."""

def __init__(self, schema: dict, classes: list):
def __init__(self, schema: dict, classes: list, dependency_order: list):
"""Load and compile a JSON schema."""
self.schema = _load_schema(schema)
# add links property
Expand All @@ -407,7 +411,7 @@ def __init__(self, schema: dict, classes: list):
}
}
# add links element
links, nested_links = _generate_links_from_fhir_references(schema, classes)
links, nested_links = _generate_links_from_fhir_references(schema, classes, dependency_order)
self.schema['links'] = links + nested_links
# check schema
jsonschema.Draft202012Validator.check_schema(schema) # Draft202012Validator.check_schema(schema)
Expand Down Expand Up @@ -471,7 +475,6 @@ def insert_links(self, vertex: dict) -> dict:
return vertex

for schema_link in _schema['links']:

keys = self._extract_href_keys(schema_link['href'])

values = self._extract_values(schema_link, vertex)
Expand Down Expand Up @@ -606,7 +609,7 @@ def __exit__(self, exc_type, exc_value, exc_tb):
pass

@staticmethod
def insert_links(schema, classes) -> dict:
def insert_links(schema, classes, dependency_order) -> dict:
"""Insert links into a schema.
Parameters:
Expand All @@ -616,8 +619,9 @@ def insert_links(schema, classes) -> dict:
dict: schema with links inserted
"""

schema = _load_schema(schema)
links, nested_links = _generate_links_from_fhir_references(schema, classes)
links, nested_links = _generate_links_from_fhir_references(schema, classes, dependency_order)
schema['links'] = links + nested_links
schema['properties']['links'] = {
'type': 'array',
Expand Down
29 changes: 29 additions & 0 deletions tests/unit/link-description-object/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,30 @@ def python_source_directories() -> List[str]:
return ["tools", "tests"]


_DEPENDENCY_LIST = [
"Organization",
"Practitioner",
"PractitionerRole",
"ResearchStudy",
"Patient",
"ResearchSubject",
"Substance",
"Specimen",
"Observation",
"DiagnosticReport",
"Condition",
"Medication",
"MedicationAdministration",
"MedicationStatement",
"MedicationRequest",
"Procedure",
"DocumentReference",
"Task",
"ImagingStudy",
"FamilyMemberHistory",
"BodyStructure"
]

_SCHEMA = yaml.safe_load("""
---
"$schema": https://json-schema.org/draft/2020-12/schema
Expand Down Expand Up @@ -162,6 +186,11 @@ def python_source_directories() -> List[str]:
""")


@pytest.fixture
def dependency_list():
return _DEPENDENCY_LIST


@pytest.fixture
def nested_references():
return _NESTED_REFERENCES['references']
Expand Down
16 changes: 8 additions & 8 deletions tests/unit/link-description-object/test_vertex_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,20 @@
EXPECTED_LINKS = ['collection_collector_Patient', 'note_authorReference_Patient', 'parent', 'subject_Patient']


def test_specimen_schema_decorator():
def test_specimen_schema_decorator(dependency_list):
"""Ensure links are discovered from properties."""
schemas = extract_schemas([Specimen, Patient], BASE_URI)
specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient])
specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient], dependency_list)
assert len(specimen_schema.schema['links']) == 4, ("Specimen should have 4 links", yaml.dump(specimen_schema.schema, sort_keys=False))
actual_links = sorted([_['rel'] for _ in specimen_schema.schema['links']])
print(sorted(actual_links))
assert actual_links == EXPECTED_LINKS, ("Specimen links should match", actual_links, EXPECTED_LINKS)


def test_vertex_link_writer_polymorphic():
def test_vertex_link_writer_polymorphic(dependency_list):
"""Ensure links are discovered from properties. Use a context manager for throughput."""
schemas = extract_schemas([Specimen], BASE_URI)
specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient, Device, Group])
specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient, Device, Group], dependency_list)
with VertexLinkWriter(specimen_schema) as mgr:
for specimen in [
{'id': 's-p1', 'resourceType': 'Specimen', 'subject': {'reference': 'Patient/p1'}},
Expand All @@ -42,11 +42,11 @@ def test_vertex_link_writer_polymorphic():
assert specimen['links'][0] == {'rel': f'subject_{entity_type}', 'href': ref}, "Links should be added to specimen"


def test_vertex_link_writer_nested():
def test_vertex_link_writer_nested(dependency_list):
"""Ensure links are discovered from properties. Use a context manager for throughput."""
schemas = extract_schemas([Specimen], BASE_URI)

specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient, Substance])
specimen_schema = VertexSchemaDecorator(schemas['Specimen'], [Specimen, Patient, Substance], dependency_list)

with VertexLinkWriter(specimen_schema) as mgr:

Expand Down Expand Up @@ -80,13 +80,13 @@ def test_vertex_link_writer_nested():
assert specimen['links'][i] == {'rel': 'processing_additive', 'href': specimen['processing'][i]['additive'][i]['reference']}, "Links should be added to specimen"


def test_schema_link_writer_nested():
def test_schema_link_writer_nested(dependency_list):
"""Ensure links are discovered from schema. Use a context manager for throughput."""
schemas = extract_schemas([Specimen], BASE_URI)
specimen_schema = schemas['Specimen']

with SchemaLinkWriter() as mgr:
specimen_schema = mgr.insert_links(specimen_schema, [Specimen, Patient])
specimen_schema = mgr.insert_links(specimen_schema, [Specimen, Patient], dependency_list)
assert specimen_schema['links'] is not None, "Links should be added to specimen"
assert specimen_schema['properties']['links'] is not None, "Links should be added to specimen properties"
assert 'links' not in specimen_schema['properties']['links'], "Links should not be double nested"

0 comments on commit 9aa79a0

Please sign in to comment.