Skip to content

Commit

Permalink
Merge pull request #830 from aaxelb/fix/indexing-supplement
Browse files Browse the repository at this point in the history
[ENG-6521] fix: include supplements in indexed data
  • Loading branch information
aaxelb authored Nov 4, 2024
2 parents 8b39c69 + 83cd82e commit ec39058
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 15 deletions.
3 changes: 2 additions & 1 deletion share/search/index_strategy/trove_indexcard_flats.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def index_mappings(self):
}

def _build_sourcedoc(self, indexcard_rdf):
_rdfdoc = primitive_rdf.RdfGraph(indexcard_rdf.as_rdf_tripledict())
_rdfdoc = indexcard_rdf.as_rdfdoc_with_supplements()
if _should_skip_card(indexcard_rdf, _rdfdoc):
return None # will be deleted from the index
_nested_iris = defaultdict(set)
Expand Down Expand Up @@ -285,6 +285,7 @@ def build_elastic_actions(self, messages_chunk: messages.MessagesChunk):
.exclude(indexcard__deleted__isnull=False)
.select_related('indexcard__source_record_suid__source_config')
.prefetch_related('indexcard__focus_identifier_set')
.prefetch_related('indexcard__supplementary_rdf_set')
)
_remaining_indexcard_ids = set(messages_chunk.target_ids_chunk)
for _indexcard_rdf in _indexcard_rdf_qs:
Expand Down
4 changes: 3 additions & 1 deletion tests/trove/derive/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from unittest import mock, TestCase
import typing

from primitive_metadata import primitive_rdf as rdf

from ._inputs import DERIVER_TEST_DOCS, DeriverTestDoc


Expand Down Expand Up @@ -54,7 +56,7 @@ def _get_deriver(self, input_doc: DeriverTestDoc):
_mock_indexcard_rdf = mock.Mock()
_mock_indexcard_rdf.id = '--indexcardf-id--'
_mock_indexcard_rdf.modified = datetime.datetime(2345, 2, 2)
_mock_indexcard_rdf.as_rdf_tripledict.return_value = input_doc.tripledict
_mock_indexcard_rdf.as_rdfdoc_with_supplements.return_value = rdf.RdfGraph(input_doc.tripledict)
_mock_indexcard_rdf.focus_iri = input_doc.focus_iri
_mock_indexcard_rdf.from_raw_datum_id = '--rawdatum-id--'
_mock_indexcard_rdf.indexcard.id = '--indexcard-id--'
Expand Down
13 changes: 13 additions & 0 deletions tests/trove/digestive_tract/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ def test_extract(self):
_BLARG.like: {_BLARG.that},
},
})
self.assertEqual(_latest_rdf.as_rdfdoc_with_supplements().tripledict, {
_BLARG.this: {
rdf.RDF.type: {_BLARG.Thing},
_BLARG.like: {_BLARG.that},
},
})

def test_extract_supplementary_without_prior(self):
_cards = digestive_tract.extract(self.supplementary_raw)
Expand All @@ -91,6 +97,13 @@ def test_extract_supplementary(self):
},
})
self.assertEqual(_indexcard.latest_rdf.modified, _orig_timestamp)
self.assertEqual(_indexcard.latest_rdf.as_rdfdoc_with_supplements().tripledict, {
_BLARG.this: {
rdf.RDF.type: {_BLARG.Thing},
_BLARG.like: {_BLARG.that, _BLARG.another},
_BLARG.unlike: {_BLARG.nonthing},
},
})

def test_extract_empty_with_prior(self):
(_prior_indexcard,) = digestive_tract.extract(self.raw)
Expand Down
11 changes: 2 additions & 9 deletions trove/derive/_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import abc
from collections.abc import Iterable

from primitive_metadata import primitive_rdf

Expand All @@ -11,16 +10,10 @@ class IndexcardDeriver(abc.ABC):
focus_iri: str
data: primitive_rdf.RdfGraph

def __init__(
self,
upriver_rdf: IndexcardRdf,
supplementary_rdf_set: Iterable[IndexcardRdf] = (),
):
def __init__(self, upriver_rdf: IndexcardRdf):
self.upriver_rdf = upriver_rdf
self.focus_iri = upriver_rdf.focus_iri
self.data = primitive_rdf.RdfGraph(upriver_rdf.as_rdf_tripledict())
for _supplementary_rdf in supplementary_rdf_set:
self.data.add_tripledict(_supplementary_rdf.as_rdf_tripledict())
self.data = upriver_rdf.as_rdfdoc_with_supplements()

def q(self, pathset):
# convenience for querying self.data on self.focus_iri
Expand Down
5 changes: 1 addition & 4 deletions trove/digestive_tract.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,7 @@ def derive(indexcard: trove_db.Indexcard, deriver_iris=None):
return []
_derived_list = []
for _deriver_class in get_deriver_classes(deriver_iris):
_deriver = _deriver_class(
upriver_rdf=_latest_rdf,
supplementary_rdf_set=indexcard.supplementary_rdf_set.all(),
)
_deriver = _deriver_class(upriver_rdf=_latest_rdf)
_deriver_identifier = trove_db.ResourceIdentifier.objects.get_or_create_for_iri(_deriver.deriver_iri())
if _deriver.should_skip():
trove_db.DerivedIndexcard.objects.filter(
Expand Down
7 changes: 7 additions & 0 deletions trove/models/indexcard.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,13 @@ def as_quoted_graph(self) -> rdf.QuotedGraph:
focus_iri=self.focus_iri,
)

def as_rdfdoc_with_supplements(self) -> rdf.RdfGraph:
'''build an rdf graph composed of this rdf and all current card supplements'''
_rdfdoc = rdf.RdfGraph(self.as_rdf_tripledict())
for _supplementary_rdf in self.indexcard.supplementary_rdf_set.all():
_rdfdoc.add_tripledict(_supplementary_rdf.as_rdf_tripledict())
return _rdfdoc

class Meta:
abstract = True

Expand Down

0 comments on commit ec39058

Please sign in to comment.