Skip to content

Commit

Permalink
fix: omit irrelevant search-result evidence/highlights
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Feb 21, 2025
1 parent c09e4f4 commit 27c05fa
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 8 deletions.
20 changes: 12 additions & 8 deletions share/search/index_strategy/trovesearch_denorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ def _cardsearch_handle(
_results.append(CardsearchResult(
card_iri=_card_iri,
card_pk=_es8_hit['_id'],
text_match_evidence=list(self._gather_textmatch_evidence(_card_iri, _es8_hit)),
text_match_evidence=list(self._gather_textmatch_evidence(_card_iri, _es8_hit, cardsearch_params)),
))
_relatedproperty_list: list[PropertypathUsage] = []
if cardsearch_params.related_property_paths:
Expand All @@ -585,16 +585,20 @@ def _cardsearch_handle(
search_params=cardsearch_params,
)

def _gather_textmatch_evidence(self, card_iri, es8_hit) -> Iterator[TextMatchEvidence]:
def _gather_textmatch_evidence(self, card_iri, es8_hit, cardsearch_params) -> Iterator[TextMatchEvidence]:
for _field, _snippets in es8_hit.get('highlight', {}).items():
(_, _, _encoded_path) = _field.rpartition('.')
_property_path = _parse_path_field_name(_encoded_path)
for _snippet in _snippets:
yield TextMatchEvidence(
property_path=_property_path,
matching_highlight=rdf.literal(_snippet),
card_iri=card_iri,
)
if ( # skip highlights on non-requested text fields
_property_path in cardsearch_params.cardsearch_text_paths
or len(_property_path) in cardsearch_params.cardsearch_text_glob_depths
):
for _snippet in _snippets:
yield TextMatchEvidence(
property_path=_property_path,
matching_highlight=rdf.literal(_snippet),
card_iri=card_iri,
)


###
Expand Down
15 changes: 15 additions & 0 deletions trove/trovesearch/search_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,21 @@ def cardsearch_type_iris(self):
if _filter.is_type_filter():
yield from _filter.value_set

@functools.cached_property
def cardsearch_text_paths(self) -> PropertypathSet:
return frozenset().union(*(
_textsegment.propertypath_set
for _textsegment in self.cardsearch_textsegment_set
))

@functools.cached_property
def cardsearch_text_glob_depths(self) -> frozenset[int]:
return frozenset(
len(_path)
for _path in self.cardsearch_text_paths
if is_globpath(_path)
)

def to_querydict(self) -> QueryDict:
_querydict = super().to_querydict()
for _qp_name, _qp_value in Textsegment.queryparams_from_textsegments('cardSearchText', self.cardsearch_textsegment_set):
Expand Down

0 comments on commit 27c05fa

Please sign in to comment.