From 39114d52a4d1a176ee8d352c0460a74866f1f1fe Mon Sep 17 00:00:00 2001 From: Ben Silverman Date: Fri, 10 Jan 2025 15:24:11 -0500 Subject: [PATCH 01/26] Add keyword search on people's names (#1711) --- geniza/entities/forms.py | 15 +++ geniza/entities/models.py | 33 +++++++ .../templates/entities/person_list.html | 27 +++++- geniza/entities/tests/test_entities_models.py | 60 +++++++++++- geniza/entities/tests/test_entities_views.py | 41 ++++++++ geniza/entities/views.py | 17 +++- sitemedia/js/controllers/search_controller.js | 66 ++++++++++++- sitemedia/scss/components/_peopleform.scss | 4 + sitemedia/scss/components/_searchform.scss | 96 ++++++++++--------- sitemedia/scss/pages/_people.scss | 30 ++++++ sitemedia/scss/pages/_search.scss | 5 +- solr_conf/conf/managed-schema | 6 ++ solr_conf/conf/solrconfig.xml | 15 +++ 13 files changed, 359 insertions(+), 56 deletions(-) diff --git a/geniza/entities/forms.py b/geniza/entities/forms.py index d1552c4df..86e7be2f1 100644 --- a/geniza/entities/forms.py +++ b/geniza/entities/forms.py @@ -117,6 +117,19 @@ class Meta: class PersonListForm(RangeForm): + q = forms.CharField( + label="Keyword or Phrase", + required=False, + widget=forms.TextInput( + attrs={ + # Translators: placeholder for people keyword search input + "placeholder": _("Search for people by name"), + # Translators: accessible label for people keyword search input + "aria-label": _("word or phrase"), + "type": "search", + } + ), + ) gender = FacetChoiceField(label=_("Gender")) has_page = BooleanFacetField(label=_("Detail page available")) social_role = FacetChoiceField(label=_("Social role")) @@ -125,6 +138,8 @@ class PersonListForm(RangeForm): date_range = RangeField(label=_("Dates"), required=False, widget=YearRangeWidget()) SORT_CHOICES = [ + # Translators: label for sort by relevance + ("relevance", _("Relevance")), # Translators: label for sort by name ("name", _("Name")), # Translators: label for sort by person activity dates diff --git a/geniza/entities/models.py b/geniza/entities/models.py index 2086d4566..73cedf4c3 100644 --- a/geniza/entities/models.py +++ b/geniza/entities/models.py @@ -16,6 +16,7 @@ from django.db.models.query import Prefetch from django.forms import ValidationError from django.urls import reverse +from django.utils.html import strip_tags from django.utils.translation import gettext as _ from gfklookupwidget.fields import GfkLookupField from parasolr.django import AliasedSolrQuerySet @@ -871,6 +872,7 @@ def index_data(self): # basic metadata "slug_s": self.slug, "name_s": str(self), + "other_names_ss": [n.name for n in self.names.non_primary()], "description_txt": self.description_en, "gender_s": self.get_gender_display(), "role_s": self.role.name_en if self.role else None, @@ -951,8 +953,12 @@ class PersonSolrQuerySet(AliasedSolrQuerySet): #: map readable field names to actual solr fields field_aliases = { + "id": "id", # needed to match results with highlighting "slug": "slug_s", "name": "name_s", + # need access to these other_names fields for highlighting + "other_names_nostem": "other_names_nostem", + "other_names_bigram": "other_names_bigram", "description": "description_txt", "gender": "gender_s", "role": "role_s", @@ -965,6 +971,33 @@ class PersonSolrQuerySet(AliasedSolrQuerySet): "has_page": "has_page_b", } + keyword_search_qf = "{!type=edismax qf=$people_qf pf=$people_pf v=$keyword_query}" + + def keyword_search(self, search_term): + """Allow searching using keywords with the specified query and phrase match + fields, and set the default operator to AND""" + query_params = {"keyword_query": search_term, "q.op": "AND"} + return self.search(self.keyword_search_qf).raw_query_parameters( + **query_params, + ) + + def get_highlighting(self): + """dedupe highlights across variant fields (e.g. for other_names)""" + highlights = super().get_highlighting() + for person in highlights.keys(): + other_names = set() + # iterate through other_names_* fields to get all matches + for hls in [ + highlights[person][field] + for field in highlights[person].keys() + if field.startswith("other_names_") + ]: + # strip highglight tags and whitespace, then add to set + cleaned_names = [strip_tags(hl.strip()) for hl in hls] + other_names.update(set(cleaned_names)) + highlights[person]["other_names"] = [n for n in other_names if n] + return highlights + class PastPersonSlug(models.Model): """A slug that was previously associated with a :class:`Person`; diff --git a/geniza/entities/templates/entities/person_list.html b/geniza/entities/templates/entities/person_list.html index 3197c9b5d..65c8d6f41 100644 --- a/geniza/entities/templates/entities/person_list.html +++ b/geniza/entities/templates/entities/person_list.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% load static i18n humanize widget_tweaks %} +{% load static i18n humanize corpus_extras widget_tweaks %} {% block meta_title %}{{ page_title }}{% endblock meta_title %} {% block meta_description %}{{ page_description }}{% endblock meta_description %} @@ -8,6 +8,13 @@

{{ page_title }}

+
+ {% render_field form.q data-search-target="query" data-action="input->search#autoUpdateRadioSort change->search#update" %} + + {# Translators: Search submit button #} + {% translate 'Submit search' as search_label %} +