diff --git a/geniza/corpus/solr_queryset.py b/geniza/corpus/solr_queryset.py index 8e7bf136b..97338e681 100644 --- a/geniza/corpus/solr_queryset.py +++ b/geniza/corpus/solr_queryset.py @@ -379,35 +379,37 @@ def get_highlighting(self): if highlighted_block ] } + else: + is_exact_search = "hl_query" in self.raw_params + for doc in highlights.keys(): + # _nostem fields should take precedence over stemmed fields in the case of an + # exact search; in that case, replace highlights for stemmed fields with nostem + if is_exact_search and "description_nostem" in highlights[doc]: + highlights[doc]["description"] = highlights[doc][ + "description_nostem" + ] + if is_exact_search and "transcription_nostem" in highlights[doc]: + highlights[doc]["transcription"] = [ + clean_html(s) for s in highlights[doc]["transcription_nostem"] + ] + elif "transcription" in highlights[doc]: + highlights[doc]["transcription"] = [ + clean_html(s) for s in highlights[doc]["transcription"] + ] + if "translation" in highlights[doc]: + highlights[doc]["translation"] = [ + clean_html(s) for s in highlights[doc]["translation"] + ] - is_exact_search = "hl_query" in self.raw_params - for doc in highlights.keys(): - # _nostem fields should take precedence over stemmed fields in the case of an - # exact search; in that case, replace highlights for stemmed fields with nostem - if is_exact_search and "description_nostem" in highlights[doc]: - highlights[doc]["description"] = highlights[doc]["description_nostem"] - if is_exact_search and "transcription_nostem" in highlights[doc]: - highlights[doc]["transcription"] = [ - clean_html(s) for s in highlights[doc]["transcription_nostem"] - ] - elif "transcription" in highlights[doc]: - highlights[doc]["transcription"] = [ - clean_html(s) for s in highlights[doc]["transcription"] - ] - if "translation" in highlights[doc]: - highlights[doc]["translation"] = [ - clean_html(s) for s in highlights[doc]["translation"] - ] - - # handle old shelfmark highlighting; sometimes it's on one or the other - # field, and sometimes one of the highlight results is empty - if "old_shelfmark" in highlights[doc]: - highlights[doc]["old_shelfmark"] = ", ".join( - [h for h in highlights[doc]["old_shelfmark"] if h] - ) - elif "old_shelfmark_t" in highlights[doc]: - highlights[doc]["old_shelfmark"] = ", ".join( - [h for h in highlights[doc]["old_shelfmark_t"] if h] - ) + # handle old shelfmark highlighting; sometimes it's on one or the other + # field, and sometimes one of the highlight results is empty + if "old_shelfmark" in highlights[doc]: + highlights[doc]["old_shelfmark"] = ", ".join( + [h for h in highlights[doc]["old_shelfmark"] if h] + ) + elif "old_shelfmark_t" in highlights[doc]: + highlights[doc]["old_shelfmark"] = ", ".join( + [h for h in highlights[doc]["old_shelfmark_t"] if h] + ) return highlights diff --git a/geniza/corpus/tests/test_corpus_solrqueryset.py b/geniza/corpus/tests/test_corpus_solrqueryset.py index 138384f73..046ee7571 100644 --- a/geniza/corpus/tests/test_corpus_solrqueryset.py +++ b/geniza/corpus/tests/test_corpus_solrqueryset.py @@ -383,11 +383,22 @@ def test_get_highlighting__regex(self): mock_get_results.return_value = [ {"id": "document.1", "transcription_regex": ["a test text"]} ] - highlighting = dqs.get_highlighting() - assert highlighting != test_highlight - assert "match" not in highlighting["document.1"]["transcription"] - assert len(highlighting["document.1"]["transcription"]) == 1 - assert "test" in highlighting["document.1"]["transcription"][0] + with patch("geniza.corpus.solr_queryset.clean_html") as mock_clean_html: + highlighting = dqs.get_highlighting() + assert highlighting != test_highlight + assert "match" not in highlighting["document.1"]["transcription"] + assert len(highlighting["document.1"]["transcription"]) == 1 + assert ( + "test" + in highlighting["document.1"]["transcription"][0] + ) + # in regex, clean_html should not be called + mock_clean_html.assert_not_called + # it should stil be called in other types of searches + mock_get_results.return_value = [ + {"id": "document.1", "transcription_nostem": ["a test text"]} + ] + mock_clean_html.assert_called_once def test_regex_search(self): dqs = DocumentSolrQuerySet()