From 83cd82eaa64b9c8ee4f2e86441188b53605194d3 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Mon, 4 Nov 2024 10:00:29 -0500 Subject: [PATCH] fix: include supplements in indexed data --- .../search/index_strategy/trove_indexcard_flats.py | 3 ++- tests/trove/derive/_base.py | 4 +++- tests/trove/digestive_tract/test_extract.py | 13 +++++++++++++ trove/derive/_base.py | 11 ++--------- trove/digestive_tract.py | 5 +---- trove/models/indexcard.py | 7 +++++++ 6 files changed, 28 insertions(+), 15 deletions(-) diff --git a/share/search/index_strategy/trove_indexcard_flats.py b/share/search/index_strategy/trove_indexcard_flats.py index 610591e1a..e2b879d24 100644 --- a/share/search/index_strategy/trove_indexcard_flats.py +++ b/share/search/index_strategy/trove_indexcard_flats.py @@ -167,7 +167,7 @@ def index_mappings(self): } def _build_sourcedoc(self, indexcard_rdf): - _rdfdoc = primitive_rdf.RdfGraph(indexcard_rdf.as_rdf_tripledict()) + _rdfdoc = indexcard_rdf.as_rdfdoc_with_supplements() if _should_skip_card(indexcard_rdf, _rdfdoc): return None # will be deleted from the index _nested_iris = defaultdict(set) @@ -285,6 +285,7 @@ def build_elastic_actions(self, messages_chunk: messages.MessagesChunk): .exclude(indexcard__deleted__isnull=False) .select_related('indexcard__source_record_suid__source_config') .prefetch_related('indexcard__focus_identifier_set') + .prefetch_related('indexcard__supplementary_rdf_set') ) _remaining_indexcard_ids = set(messages_chunk.target_ids_chunk) for _indexcard_rdf in _indexcard_rdf_qs: diff --git a/tests/trove/derive/_base.py b/tests/trove/derive/_base.py index 40cdebcb9..b15a6d20b 100644 --- a/tests/trove/derive/_base.py +++ b/tests/trove/derive/_base.py @@ -2,6 +2,8 @@ from unittest import mock, TestCase import typing +from primitive_metadata import primitive_rdf as rdf + from ._inputs import DERIVER_TEST_DOCS, DeriverTestDoc @@ -54,7 +56,7 @@ def _get_deriver(self, input_doc: DeriverTestDoc): _mock_indexcard_rdf = mock.Mock() _mock_indexcard_rdf.id = '--indexcardf-id--' _mock_indexcard_rdf.modified = datetime.datetime(2345, 2, 2) - _mock_indexcard_rdf.as_rdf_tripledict.return_value = input_doc.tripledict + _mock_indexcard_rdf.as_rdfdoc_with_supplements.return_value = rdf.RdfGraph(input_doc.tripledict) _mock_indexcard_rdf.focus_iri = input_doc.focus_iri _mock_indexcard_rdf.from_raw_datum_id = '--rawdatum-id--' _mock_indexcard_rdf.indexcard.id = '--indexcard-id--' diff --git a/tests/trove/digestive_tract/test_extract.py b/tests/trove/digestive_tract/test_extract.py index 64f975e34..5a87e8099 100644 --- a/tests/trove/digestive_tract/test_extract.py +++ b/tests/trove/digestive_tract/test_extract.py @@ -65,6 +65,12 @@ def test_extract(self): _BLARG.like: {_BLARG.that}, }, }) + self.assertEqual(_latest_rdf.as_rdfdoc_with_supplements().tripledict, { + _BLARG.this: { + rdf.RDF.type: {_BLARG.Thing}, + _BLARG.like: {_BLARG.that}, + }, + }) def test_extract_supplementary_without_prior(self): _cards = digestive_tract.extract(self.supplementary_raw) @@ -91,6 +97,13 @@ def test_extract_supplementary(self): }, }) self.assertEqual(_indexcard.latest_rdf.modified, _orig_timestamp) + self.assertEqual(_indexcard.latest_rdf.as_rdfdoc_with_supplements().tripledict, { + _BLARG.this: { + rdf.RDF.type: {_BLARG.Thing}, + _BLARG.like: {_BLARG.that, _BLARG.another}, + _BLARG.unlike: {_BLARG.nonthing}, + }, + }) def test_extract_empty_with_prior(self): (_prior_indexcard,) = digestive_tract.extract(self.raw) diff --git a/trove/derive/_base.py b/trove/derive/_base.py index a16dc8fe0..9909e8f19 100644 --- a/trove/derive/_base.py +++ b/trove/derive/_base.py @@ -1,5 +1,4 @@ import abc -from collections.abc import Iterable from primitive_metadata import primitive_rdf @@ -11,16 +10,10 @@ class IndexcardDeriver(abc.ABC): focus_iri: str data: primitive_rdf.RdfGraph - def __init__( - self, - upriver_rdf: IndexcardRdf, - supplementary_rdf_set: Iterable[IndexcardRdf] = (), - ): + def __init__(self, upriver_rdf: IndexcardRdf): self.upriver_rdf = upriver_rdf self.focus_iri = upriver_rdf.focus_iri - self.data = primitive_rdf.RdfGraph(upriver_rdf.as_rdf_tripledict()) - for _supplementary_rdf in supplementary_rdf_set: - self.data.add_tripledict(_supplementary_rdf.as_rdf_tripledict()) + self.data = upriver_rdf.as_rdfdoc_with_supplements() def q(self, pathset): # convenience for querying self.data on self.focus_iri diff --git a/trove/digestive_tract.py b/trove/digestive_tract.py index 9be767642..2a95fb056 100644 --- a/trove/digestive_tract.py +++ b/trove/digestive_tract.py @@ -176,10 +176,7 @@ def derive(indexcard: trove_db.Indexcard, deriver_iris=None): return [] _derived_list = [] for _deriver_class in get_deriver_classes(deriver_iris): - _deriver = _deriver_class( - upriver_rdf=_latest_rdf, - supplementary_rdf_set=indexcard.supplementary_rdf_set.all(), - ) + _deriver = _deriver_class(upriver_rdf=_latest_rdf) _deriver_identifier = trove_db.ResourceIdentifier.objects.get_or_create_for_iri(_deriver.deriver_iri()) if _deriver.should_skip(): trove_db.DerivedIndexcard.objects.filter( diff --git a/trove/models/indexcard.py b/trove/models/indexcard.py index d6e46c7c7..5d4ca9441 100644 --- a/trove/models/indexcard.py +++ b/trove/models/indexcard.py @@ -313,6 +313,13 @@ def as_quoted_graph(self) -> rdf.QuotedGraph: focus_iri=self.focus_iri, ) + def as_rdfdoc_with_supplements(self) -> rdf.RdfGraph: + '''build an rdf graph composed of this rdf and all current card supplements''' + _rdfdoc = rdf.RdfGraph(self.as_rdf_tripledict()) + for _supplementary_rdf in self.indexcard.supplementary_rdf_set.all(): + _rdfdoc.add_tripledict(_supplementary_rdf.as_rdf_tripledict()) + return _rdfdoc + class Meta: abstract = True