From 5b5fc9fdf8e793b4444c9f04c16eb71e023ecd37 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Mon, 27 Jan 2025 11:52:48 -0500 Subject: [PATCH] fix: improve hacks around gathering --- trove/trovesearch/trovesearch_gathering.py | 45 ++++++++++++++++++---- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/trove/trovesearch/trovesearch_gathering.py b/trove/trovesearch/trovesearch_gathering.py index 448efc859..0ceed3ccb 100644 --- a/trove/trovesearch/trovesearch_gathering.py +++ b/trove/trovesearch/trovesearch_gathering.py @@ -1,7 +1,7 @@ import dataclasses import logging import urllib.parse -from typing import ClassVar, Any +from typing import ClassVar, Any, Iterator, Iterable from primitive_metadata.primitive_rdf import ( Literal, @@ -179,7 +179,15 @@ def gather_cardsearch_page(focus: CardsearchFocus, *, deriver_iri, **kwargs): # (what with all these intermediate blank nodes and sequences): # yield trove:resourceMetadata here (instead of another gatherer) _card_focus = _card_foci[_result.card_iri] - yield (_result.card_iri, TROVE.resourceMetadata, _card_focus.resourceMetadata) + _card_twoples = _minimal_indexcard_twoples( + focus_identifiers=[ + _identifier.as_iri() + for _identifier in _card_focus.indexcard.focus_identifier_set.all() + ], + resource_metadata=_card_focus.resourceMetadata, + ) + for _pred, _obj in _card_twoples: + yield (_result.card_iri, _pred, _obj) yield (TROVE.searchResultPage, sequence(_result_page)) _current_handle = _current_handle.get_next_streaming_handle() @@ -246,7 +254,15 @@ def gather_valuesearch_page(focus: ValuesearchFocus, *, deriver_iri, **kwargs): # hack around (current) limitations of primitive_metadata.gather # (what with all these intermediate blank nodes and sequences): # yield trove:resourceMetadata here (instead of another gatherer) - yield (_indexcard_obj, TROVE.resourceMetadata, _card_focus.resourceMetadata) + _card_twoples = _minimal_indexcard_twoples( + focus_identifiers=[ + _identifier.as_iri() + for _identifier in _card_focus.indexcard.focus_identifier_set.all() + ], + resource_metadata=_card_focus.resourceMetadata, + ) + for _pred, _obj in _card_twoples: + yield (_indexcard_obj, _pred, _obj) if _indexcard_obj is None: # no actual indexcard; put what we know in a blanknode-indexcard _indexcard_obj = _valuesearch_result_as_indexcard_blanknode(_result) @@ -443,12 +459,25 @@ def _valuesearch_result_as_json(result: ValuesearchResult) -> Literal: ) +def _minimal_indexcard_twoples( + focus_identifiers: Iterable[str], + resource_metadata: rdf.Literal, +) -> Iterator[rdf.RdfTwople]: + yield (RDF.type, TROVE.Indexcard) + for _identifier in focus_identifiers: + yield (TROVE.focusIdentifier, ( + _identifier + if isinstance(_identifier, rdf.Literal) + else literal(_identifier) + )) + yield (TROVE.resourceMetadata, resource_metadata) + + def _valuesearch_result_as_indexcard_blanknode(result: ValuesearchResult) -> frozenset: - return blanknode({ - RDF.type: {TROVE.Indexcard}, - TROVE.focusIdentifier: {literal(result.value_iri or result.value_value)}, - TROVE.resourceMetadata: {_valuesearch_result_as_json(result)}, - }) + return frozenset(_minimal_indexcard_twoples( + focus_identifiers=[literal(result.value_iri or result.value_value)], + resource_metadata=_valuesearch_result_as_json(result), + )) def _osfmap_json(tripledict, focus_iri):