Skip to content

Commit

Permalink
Prevent whitespace added around <em> in regex search (#1710)
Browse files Browse the repository at this point in the history
  • Loading branch information
blms committed Jan 13, 2025
1 parent d15759c commit d56ad15
Showing 1 changed file with 31 additions and 29 deletions.
60 changes: 31 additions & 29 deletions geniza/corpus/solr_queryset.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,35 +379,37 @@ def get_highlighting(self):
if highlighted_block
]
}
else:
is_exact_search = "hl_query" in self.raw_params
for doc in highlights.keys():
# _nostem fields should take precedence over stemmed fields in the case of an
# exact search; in that case, replace highlights for stemmed fields with nostem
if is_exact_search and "description_nostem" in highlights[doc]:
highlights[doc]["description"] = highlights[doc][
"description_nostem"
]
if is_exact_search and "transcription_nostem" in highlights[doc]:
highlights[doc]["transcription"] = [
clean_html(s) for s in highlights[doc]["transcription_nostem"]
]
elif "transcription" in highlights[doc]:
highlights[doc]["transcription"] = [
clean_html(s) for s in highlights[doc]["transcription"]
]
if "translation" in highlights[doc]:
highlights[doc]["translation"] = [
clean_html(s) for s in highlights[doc]["translation"]
]

is_exact_search = "hl_query" in self.raw_params
for doc in highlights.keys():
# _nostem fields should take precedence over stemmed fields in the case of an
# exact search; in that case, replace highlights for stemmed fields with nostem
if is_exact_search and "description_nostem" in highlights[doc]:
highlights[doc]["description"] = highlights[doc]["description_nostem"]
if is_exact_search and "transcription_nostem" in highlights[doc]:
highlights[doc]["transcription"] = [
clean_html(s) for s in highlights[doc]["transcription_nostem"]
]
elif "transcription" in highlights[doc]:
highlights[doc]["transcription"] = [
clean_html(s) for s in highlights[doc]["transcription"]
]
if "translation" in highlights[doc]:
highlights[doc]["translation"] = [
clean_html(s) for s in highlights[doc]["translation"]
]

# handle old shelfmark highlighting; sometimes it's on one or the other
# field, and sometimes one of the highlight results is empty
if "old_shelfmark" in highlights[doc]:
highlights[doc]["old_shelfmark"] = ", ".join(
[h for h in highlights[doc]["old_shelfmark"] if h]
)
elif "old_shelfmark_t" in highlights[doc]:
highlights[doc]["old_shelfmark"] = ", ".join(
[h for h in highlights[doc]["old_shelfmark_t"] if h]
)
# handle old shelfmark highlighting; sometimes it's on one or the other
# field, and sometimes one of the highlight results is empty
if "old_shelfmark" in highlights[doc]:
highlights[doc]["old_shelfmark"] = ", ".join(
[h for h in highlights[doc]["old_shelfmark"] if h]
)
elif "old_shelfmark_t" in highlights[doc]:
highlights[doc]["old_shelfmark"] = ", ".join(
[h for h in highlights[doc]["old_shelfmark_t"] if h]
)

return highlights

0 comments on commit d56ad15

Please sign in to comment.