diff --git a/geniza/corpus/solr_queryset.py b/geniza/corpus/solr_queryset.py
index 8e7bf136b..97338e681 100644
--- a/geniza/corpus/solr_queryset.py
+++ b/geniza/corpus/solr_queryset.py
@@ -379,35 +379,37 @@ def get_highlighting(self):
if highlighted_block
]
}
+ else:
+ is_exact_search = "hl_query" in self.raw_params
+ for doc in highlights.keys():
+ # _nostem fields should take precedence over stemmed fields in the case of an
+ # exact search; in that case, replace highlights for stemmed fields with nostem
+ if is_exact_search and "description_nostem" in highlights[doc]:
+ highlights[doc]["description"] = highlights[doc][
+ "description_nostem"
+ ]
+ if is_exact_search and "transcription_nostem" in highlights[doc]:
+ highlights[doc]["transcription"] = [
+ clean_html(s) for s in highlights[doc]["transcription_nostem"]
+ ]
+ elif "transcription" in highlights[doc]:
+ highlights[doc]["transcription"] = [
+ clean_html(s) for s in highlights[doc]["transcription"]
+ ]
+ if "translation" in highlights[doc]:
+ highlights[doc]["translation"] = [
+ clean_html(s) for s in highlights[doc]["translation"]
+ ]
- is_exact_search = "hl_query" in self.raw_params
- for doc in highlights.keys():
- # _nostem fields should take precedence over stemmed fields in the case of an
- # exact search; in that case, replace highlights for stemmed fields with nostem
- if is_exact_search and "description_nostem" in highlights[doc]:
- highlights[doc]["description"] = highlights[doc]["description_nostem"]
- if is_exact_search and "transcription_nostem" in highlights[doc]:
- highlights[doc]["transcription"] = [
- clean_html(s) for s in highlights[doc]["transcription_nostem"]
- ]
- elif "transcription" in highlights[doc]:
- highlights[doc]["transcription"] = [
- clean_html(s) for s in highlights[doc]["transcription"]
- ]
- if "translation" in highlights[doc]:
- highlights[doc]["translation"] = [
- clean_html(s) for s in highlights[doc]["translation"]
- ]
-
- # handle old shelfmark highlighting; sometimes it's on one or the other
- # field, and sometimes one of the highlight results is empty
- if "old_shelfmark" in highlights[doc]:
- highlights[doc]["old_shelfmark"] = ", ".join(
- [h for h in highlights[doc]["old_shelfmark"] if h]
- )
- elif "old_shelfmark_t" in highlights[doc]:
- highlights[doc]["old_shelfmark"] = ", ".join(
- [h for h in highlights[doc]["old_shelfmark_t"] if h]
- )
+ # handle old shelfmark highlighting; sometimes it's on one or the other
+ # field, and sometimes one of the highlight results is empty
+ if "old_shelfmark" in highlights[doc]:
+ highlights[doc]["old_shelfmark"] = ", ".join(
+ [h for h in highlights[doc]["old_shelfmark"] if h]
+ )
+ elif "old_shelfmark_t" in highlights[doc]:
+ highlights[doc]["old_shelfmark"] = ", ".join(
+ [h for h in highlights[doc]["old_shelfmark_t"] if h]
+ )
return highlights
diff --git a/geniza/corpus/tests/test_corpus_solrqueryset.py b/geniza/corpus/tests/test_corpus_solrqueryset.py
index 138384f73..046ee7571 100644
--- a/geniza/corpus/tests/test_corpus_solrqueryset.py
+++ b/geniza/corpus/tests/test_corpus_solrqueryset.py
@@ -383,11 +383,22 @@ def test_get_highlighting__regex(self):
mock_get_results.return_value = [
{"id": "document.1", "transcription_regex": ["a test text"]}
]
- highlighting = dqs.get_highlighting()
- assert highlighting != test_highlight
- assert "match" not in highlighting["document.1"]["transcription"]
- assert len(highlighting["document.1"]["transcription"]) == 1
- assert "test" in highlighting["document.1"]["transcription"][0]
+ with patch("geniza.corpus.solr_queryset.clean_html") as mock_clean_html:
+ highlighting = dqs.get_highlighting()
+ assert highlighting != test_highlight
+ assert "match" not in highlighting["document.1"]["transcription"]
+ assert len(highlighting["document.1"]["transcription"]) == 1
+ assert (
+ "test"
+ in highlighting["document.1"]["transcription"][0]
+ )
+ # in regex, clean_html should not be called
+ mock_clean_html.assert_not_called
+ # it should stil be called in other types of searches
+ mock_get_results.return_value = [
+ {"id": "document.1", "transcription_nostem": ["a test text"]}
+ ]
+ mock_clean_html.assert_called_once
def test_regex_search(self):
dqs = DocumentSolrQuerySet()