diff --git a/CHANGELOG.rst b/CHANGELOG.rst index bd727ffd3..9d6c4fac1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,23 @@ Change Log ========== +4.20 +---- + +- public site + - As a public user, I want a keyword search in the people module so that I can easily find people entries. + - As a public user, I want to see related places on a place page on the main individual place page (not in a separate tab) so that I can see associated neighborhoods and also places that have similar names but are distinct + - As a public site user, I want any JA or Arabic search to link to the equivalent search on the Arabic Papyrology Database website, so that I can find additional content not present in the PGP. + - As a public user and content admin, I want to see two separate automatic date fields for people: one of only documents where they are mentioned as deceased and one with all other dated person-doc relations, so that I have a better understanding of a person's active dates and their afterlives in the documentary record. + - bugfix: Translations in Hebrew script do not pick up correct Hebrew font + +- admin + - As a content admin, I want to be able to merge person-to-person relationship types, so that I can combine duplicates or revise categorization. + - As a content admin, I want to merge person-document relationship types, so that I can keep the website current as our thinking changes (but without losing data) + - As a content editor, I want to be able to tag people with various group names so that I can sort them in another way/portray more information on the public site. + - As a content admin, I want the ability to enter asymmetrical place-place relations, so that I can adapt to changes in the way we sort and represent data (e.g. representing a neighborhood within a place). + - As a content admin, when merging documents (for joins) I want to see image thumbnails of each document so I can be sure the join is correct. + 4.19 ---- diff --git a/DEPLOYNOTES.md b/DEPLOYNOTES.md index 304e17ba7..a862144ab 100644 --- a/DEPLOYNOTES.md +++ b/DEPLOYNOTES.md @@ -1,5 +1,10 @@ # Deploy Notes +## 4.20 + +- Solr configuration has changed. Ensure Solr configset has been updated + and then reindex all content: `python manage.py index` + ## 4.19 - Indexing logic has changed. Reindex all content: `python manage.py index`. diff --git a/geniza/__init__.py b/geniza/__init__.py index bdefd609b..18c6f94b9 100644 --- a/geniza/__init__.py +++ b/geniza/__init__.py @@ -1,4 +1,4 @@ -__version_info__ = (4, 19, 0, None) +__version_info__ = (4, 20, 0, None) # Dot-connect all but the last. Last is dash-connected if not None. diff --git a/geniza/common/models.py b/geniza/common/models.py index 4d8b2a019..cd5bb788e 100644 --- a/geniza/common/models.py +++ b/geniza/common/models.py @@ -2,6 +2,7 @@ from django.contrib.auth.models import User from django.db import models +from django.db.models.functions.text import Lower from django.utils.safestring import mark_safe from modeltranslation.utils import fallbacks @@ -110,3 +111,17 @@ def objects_by_label(cls): (obj.display_label_en or obj.name_en): obj for obj in cls.objects.all() } + + +class TaggableMixin: + """Mixin for taggable models with convenience functions for generating lists of tags""" + + def all_tags(self): + """comma delimited string of all tags for this instance""" + return ", ".join(t.name for t in self.tags.all()) + + all_tags.short_description = "tags" + + def alphabetized_tags(self): + """tags in alphabetical order, case-insensitive sorting""" + return self.tags.order_by(Lower("name")) diff --git a/geniza/corpus/models.py b/geniza/corpus/models.py index c056cf875..0e1b91f06 100644 --- a/geniza/corpus/models.py +++ b/geniza/corpus/models.py @@ -18,7 +18,6 @@ from django.core.validators import RegexValidator from django.db import models from django.db.models.functions import Concat -from django.db.models.functions.text import Lower from django.db.models.query import Prefetch from django.db.models.signals import pre_delete from django.dispatch import receiver @@ -41,6 +40,7 @@ from geniza.annotations.models import Annotation from geniza.common.models import ( DisplayLabelMixin, + TaggableMixin, TrackChangesModel, cached_class_property, ) @@ -521,7 +521,7 @@ def permalink(self): return absolutize_url(self.get_absolute_url().replace(f"/{lang}/", "/")) -class Document(ModelIndexable, DocumentDateMixin, PermalinkMixin): +class Document(ModelIndexable, DocumentDateMixin, PermalinkMixin, TaggableMixin): """A unified document such as a letter or legal document that appears on one or more fragments.""" @@ -749,16 +749,6 @@ def formatted_citation(self): f"{long_name}. {available_at} {self.permalink}, accessed {today}." ) - def all_tags(self): - """comma delimited string of all tags for this document""" - return ", ".join(t.name for t in self.tags.all()) - - all_tags.short_description = "tags" - - def alphabetized_tags(self): - """tags in alphabetical order, case-insensitive sorting""" - return self.tags.order_by(Lower("name")) - def is_public(self): """admin display field indicating if doc is public or suppressed""" return self.status == self.PUBLIC diff --git a/geniza/corpus/templates/corpus/document_list.html b/geniza/corpus/templates/corpus/document_list.html index d47a68366..61e43fd1f 100644 --- a/geniza/corpus/templates/corpus/document_list.html +++ b/geniza/corpus/templates/corpus/document_list.html @@ -157,6 +157,12 @@

{% if is_paginated %} {% include "corpus/snippets/pagination.html" %} {% endif %} + {% if apd_link %} + + {# translators: Link to search a document query on the Arabic Papyrology Database #} + {% translate 'View results in the Arabic Papyrology Database' %} + + {% endif %} {% endblock main %} diff --git a/geniza/corpus/templates/corpus/snippets/document_option_label.html b/geniza/corpus/templates/corpus/snippets/document_option_label.html index d071719ad..142bdf06d 100644 --- a/geniza/corpus/templates/corpus/snippets/document_option_label.html +++ b/geniza/corpus/templates/corpus/snippets/document_option_label.html @@ -1,10 +1,11 @@ +{% load corpus_extras %} {# template snippet for displaying document label on a form #} {# used on document merge form to provide enough information to merge accurately #}

{{ document }} + title="Go to this document's admin edit page"> Change

{% if document.description %} @@ -32,6 +33,12 @@

{{ document }}
{{ document.needs_review }}

{% endif %} + {% if document.iiif_images %} +
+ + {{ document.admin_thumbnails }} +
+ {% endif %} {% if document.footnotes.count %}
@@ -43,8 +50,8 @@

{{ document }}
{{ source.grouper.formatted_display|safe }} Change + title="Go to this source's admin edit page">Change {% if source.grouper.source_type.type == "Unpublished" %}
unpublished
{% endif %} diff --git a/geniza/corpus/tests/test_corpus_views.py b/geniza/corpus/tests/test_corpus_views.py index 8fd3a9618..abade1486 100644 --- a/geniza/corpus/tests/test_corpus_views.py +++ b/geniza/corpus/tests/test_corpus_views.py @@ -1339,6 +1339,20 @@ def test_hebrew_prefix_highlight(self, source, empty_solr): 0 ] == clean_html("מרכב") + def test_get_apd_link(self): + dsv = DocumentSearchView(kwargs={}) + + # no arabic or ja: bail out + assert not dsv.get_apd_link(None) + assert not dsv.get_apd_link("test") + + # arabic: leave as is + arabic = "العبد" + assert dsv.get_apd_link(arabic) == f"{dsv.apd_base_url}{arabic}" + + # JA: translate with regex + assert dsv.get_apd_link("ואגב") == f"{dsv.apd_base_url}وا[غج]ب" + class TestDocumentScholarshipView: def test_page_title(self, document, client, source): diff --git a/geniza/corpus/views.py b/geniza/corpus/views.py index c55508a98..fdea4268e 100644 --- a/geniza/corpus/views.py +++ b/geniza/corpus/views.py @@ -34,6 +34,7 @@ from geniza.common.utils import absolutize_url from geniza.corpus import iiif_utils from geniza.corpus.forms import DocumentMergeForm, DocumentSearchForm, TagMergeForm +from geniza.corpus.ja import contains_arabic, contains_hebrew, ja_arabic_chars from geniza.corpus.models import Document, TextBlock from geniza.corpus.solr_queryset import DocumentSolrQuerySet from geniza.corpus.templatetags import corpus_extras @@ -353,6 +354,26 @@ def get_paginate_by(self, queryset): pass return paginate_by + # base url for APD searches + apd_base_url = "https://www.apd.gwi.uni-muenchen.de/apd/asearch.jsp?searchtable1=601&showdwords=true&searchwordstring1=" + + def get_apd_link(self, query): + """Generate a link to the Arabic Papyrology Database (APD) search page + using the entered query, converting any Hebrew script to Arabic with Regex""" + if not query or not (contains_arabic(query) or contains_hebrew(query)): + # if no arabic OR hebrew in query, bail out + return None + # simplified version of ja_to_arabic that uses regex instead of solr OR + for k, v in ja_arabic_chars.items(): + if type(v) == list: + # list means there is more than one option, so join options with regex + query = re.sub(k, f"[{''.join(v)}]", query) + elif type(v) == str: + # only one possible translation + query = re.sub(k, v, query) + query = query.strip() + return f"{self.apd_base_url}{query}" + def get_context_data(self, **kwargs): """extend context data to add page metadata, highlighting, and update form with facets""" @@ -387,6 +408,7 @@ def get_context_data(self, **kwargs): "page_includes_transcriptions": True, # preload transcription font "highlighting": highlights, "applied_filters": self.applied_filter_labels, + "apd_link": self.get_apd_link(context_data["form"].data.get("q", None)), } ) diff --git a/geniza/entities/admin.py b/geniza/entities/admin.py index 307595445..fbb5a9e71 100644 --- a/geniza/entities/admin.py +++ b/geniza/entities/admin.py @@ -50,7 +50,11 @@ PlacePlaceRelation, PlacePlaceRelationType, ) -from geniza.entities.views import PersonMerge +from geniza.entities.views import ( + PersonDocumentRelationTypeMerge, + PersonMerge, + PersonPersonRelationTypeMerge, +) from geniza.footnotes.models import Footnote @@ -263,6 +267,14 @@ class PersonEventInline(admin.TabularInline): class PersonAdmin(TabbedTranslationAdmin, SortableAdminBase, admin.ModelAdmin): """Admin for Person entities in the PGP""" + list_display = ( + "__str__", + "slug", + "gender", + "role", + "all_tags", + "has_page", + ) search_fields = ("name_unaccented", "names__name") fields = ( "slug", @@ -270,10 +282,12 @@ class PersonAdmin(TabbedTranslationAdmin, SortableAdminBase, admin.ModelAdmin): "role", "has_page", "date", - "automatic_date", + "active_dates", + "deceased_mention_dates", "description", + "tags", ) - readonly_fields = ("automatic_date",) + readonly_fields = ("active_dates", "deceased_mention_dates") inlines = ( NameInline, FootnoteInline, @@ -393,9 +407,13 @@ def get_urls(self): ] return urls + super().get_urls() - def automatic_date(self, obj): - """Display automatically generated date/date range for an event as a formatted string""" - return standard_date_display(obj.documents_date_range) + def active_dates(self, obj): + """Display automatically generated active date/date range for a person as a formatted string""" + return standard_date_display(obj.active_date_range) + + def deceased_mention_dates(self, obj): + """Display automatically generated deceased date/date range for a person as a formatted string""" + return standard_date_display(obj.deceased_date_range) actions = (export_to_csv, merge_people) @@ -409,22 +427,69 @@ class RoleAdmin(TabbedTranslationAdmin, admin.ModelAdmin): ordering = ("display_label", "name") +class RelationTypeMergeAdminMixin: + @admin.display(description="Merge selected %(verbose_name_plural)s") + def merge_relation_types(self, request, queryset=None): + """Admin action to merge selected entity-entity relation types. This + action redirects to an intermediate page, which displays a form to + review for confirmation and choose the primary type before merging. + """ + selected = request.POST.getlist("_selected_action") + if len(selected) < 2: + messages.error( + request, + "You must select at least two person-person relationships to merge", + ) + return HttpResponseRedirect( + reverse("admin:entities_%s_changelist" % self.model._meta.model_name) + ) + return HttpResponseRedirect( + "%s?ids=%s" + % ( + reverse(f"admin:{self.merge_path_name}"), + ",".join(selected), + ), + status=303, + ) # status code 303 means "See Other" + + def get_urls(self): + """Return admin urls; adds custom URL for merging""" + urls = [ + path( + "merge/", + self.view_class.as_view(), + name=self.merge_path_name, + ), + ] + return urls + super().get_urls() + + actions = (merge_relation_types,) + + @admin.register(PersonDocumentRelationType) -class PersonDocumentRelationTypeAdmin(TabbedTranslationAdmin, admin.ModelAdmin): +class PersonDocumentRelationTypeAdmin( + RelationTypeMergeAdminMixin, TabbedTranslationAdmin, admin.ModelAdmin +): """Admin for managing the controlled vocabulary of people's relationships to documents""" fields = ("name",) search_fields = ("name",) ordering = ("name",) + merge_path_name = "person-document-relation-type-merge" + view_class = PersonDocumentRelationTypeMerge @admin.register(PersonPersonRelationType) -class PersonPersonRelationTypeAdmin(TabbedTranslationAdmin, admin.ModelAdmin): +class PersonPersonRelationTypeAdmin( + RelationTypeMergeAdminMixin, TabbedTranslationAdmin, admin.ModelAdmin +): """Admin for managing the controlled vocabulary of people's relationships to other people""" fields = ("name", "converse_name", "category") search_fields = ("name",) ordering = ("name",) + merge_path_name = "person-person-relation-type-merge" + view_class = PersonPersonRelationTypeMerge @admin.register(PersonPlaceRelationType) @@ -477,14 +542,18 @@ class PlacePlaceReverseInline(admin.TabularInline): verbose_name_plural = "Related Places (automatically populated)" fields = ( "place_a", - "type", + "relation", "notes", ) fk_name = "place_b" - readonly_fields = ("place_a", "type", "notes") + readonly_fields = ("place_a", "relation", "notes") extra = 0 max_num = 0 + def relation(self, obj=None): + """Get the relationship type's converse name, if it exists, or else the type name""" + return (obj.type.converse_name or str(obj.type)) if obj else None + class PlaceEventInline(admin.TabularInline): """Inline for events related to a place""" @@ -595,7 +664,7 @@ def get_urls(self): class PlacePlaceRelationTypeAdmin(TabbedTranslationAdmin, admin.ModelAdmin): """Admin for managing the controlled vocabulary of places' relationships to other places""" - fields = ("name",) + fields = ("name", "converse_name") search_fields = ("name",) ordering = ("name",) diff --git a/geniza/entities/forms.py b/geniza/entities/forms.py index d1552c4df..9127e1578 100644 --- a/geniza/entities/forms.py +++ b/geniza/entities/forms.py @@ -10,6 +10,7 @@ PersonDocumentRelationType, PersonEventRelation, PersonPersonRelation, + PersonPersonRelationType, PersonPlaceRelation, PersonRole, PlaceEventRelation, @@ -54,6 +55,74 @@ def __init__(self, *args, **kwargs): ) +class RelationTypeMergeFormMixin: + def __init__(self, *args, **kwargs): + ids = kwargs.get("ids", []) + + # Remove the added kwarg so that the super method doesn't error + try: + del kwargs["ids"] + except KeyError: + pass + + super().__init__(*args, **kwargs) + self.fields[ + "primary_relation_type" + ].queryset = self.reltype_model.objects.filter(id__in=ids) + + +class PersonDocumentRelationTypeChoiceField(forms.ModelChoiceField): + """Add a summary of each PersonDocumentRelationType to a form (used for merging)""" + + label_template = get_template( + "entities/snippets/persondocumentrelationtype_option_label.html" + ) + + def label_from_instance(self, relation_type): + return self.label_template.render({"relation_type": relation_type}) + + +class PersonDocumentRelationTypeMergeForm(RelationTypeMergeFormMixin, forms.Form): + primary_relation_type = PersonDocumentRelationTypeChoiceField( + label="Select primary person-document relationship", + queryset=None, + help_text=( + "Select the primary person-document relationship, which will be " + "used as the canonical entry. All associated relations and log " + "entries will be combined on the primary relationship." + ), + empty_label=None, + widget=forms.RadioSelect, + ) + reltype_model = PersonDocumentRelationType + + +class PersonPersonRelationTypeChoiceField(forms.ModelChoiceField): + """Add a summary of each PersonPersonRelationType to a form (used for merging)""" + + label_template = get_template( + "entities/snippets/personpersonrelationtype_option_label.html" + ) + + def label_from_instance(self, relation_type): + return self.label_template.render({"relation_type": relation_type}) + + +class PersonPersonRelationTypeMergeForm(RelationTypeMergeFormMixin, forms.Form): + primary_relation_type = PersonPersonRelationTypeChoiceField( + label="Select primary person-person relationship", + queryset=None, + help_text=( + "Select the primary person-person relationship, which will be " + "used as the canonical entry. All associated relations and log " + "entries will be combined on the primary relationship." + ), + empty_label=None, + widget=forms.RadioSelect, + ) + reltype_model = PersonPersonRelationType + + class PersonPersonForm(forms.ModelForm): class Meta: model = PersonPersonRelation @@ -117,6 +186,19 @@ class Meta: class PersonListForm(RangeForm): + q = forms.CharField( + label="Keyword or Phrase", + required=False, + widget=forms.TextInput( + attrs={ + # Translators: placeholder for people keyword search input + "placeholder": _("Search for people by name"), + # Translators: accessible label for people keyword search input + "aria-label": _("word or phrase"), + "type": "search", + } + ), + ) gender = FacetChoiceField(label=_("Gender")) has_page = BooleanFacetField(label=_("Detail page available")) social_role = FacetChoiceField(label=_("Social role")) @@ -125,6 +207,8 @@ class PersonListForm(RangeForm): date_range = RangeField(label=_("Dates"), required=False, widget=YearRangeWidget()) SORT_CHOICES = [ + # Translators: label for sort by relevance + ("relevance", _("Relevance")), # Translators: label for sort by name ("name", _("Name")), # Translators: label for sort by person activity dates diff --git a/geniza/entities/metadata_export.py b/geniza/entities/metadata_export.py index 06caad296..a07c7e92b 100644 --- a/geniza/entities/metadata_export.py +++ b/geniza/entities/metadata_export.py @@ -45,6 +45,7 @@ class PublicPersonExporter(Exporter): "auto_date_range", "manual_date_range", "description", + "tags", "related_people_count", "related_documents_count", "url", @@ -82,6 +83,7 @@ def get_queryset(self): "from_person", "to_person", "personplacerelation_set", + "tags", Prefetch( "persondocumentrelation_set", queryset=PersonDocumentRelation.objects.select_related("type"), @@ -112,11 +114,13 @@ def get_export_data_dict(self, person): ), "gender": person.get_gender_display(), "social_role": str(person.role), - "auto_date_range": standard_date_display(person.documents_date_range), + "active_date_range": standard_date_display(person.active_date_range), + "deceased_date_range": standard_date_display(person.deceased_date_range), "manual_date_range": person.date, "description": person.description, "related_people_count": person.related_people_count, "related_documents_count": person.documents.count(), + "tags": person.all_tags(), } # add url if present @@ -605,6 +609,7 @@ def get_queryset(self): related_object_type=Value("Place"), relationship_type_id=F("type"), relationship_notes=F("notes"), + use_converse_typename=Value(True), ) .union( place.place_b.values( @@ -612,6 +617,7 @@ def get_queryset(self): related_object_type=Value("Place"), relationship_type_id=F("type"), relationship_notes=F("notes"), + use_converse_typename=Value(False), ) ) .union( @@ -620,6 +626,7 @@ def get_queryset(self): related_object_type=Value("Person"), relationship_type_id=F("type"), relationship_notes=F("notes"), + use_converse_typename=Value(False), ) ) .union( @@ -628,6 +635,7 @@ def get_queryset(self): related_object_type=Value("Document"), relationship_type_id=F("type"), relationship_notes=F("notes"), + use_converse_typename=Value(False), ) ) .union( @@ -638,6 +646,7 @@ def get_queryset(self): # type for event relations relationship_type_id=Value(-1), relationship_notes=F("notes"), + use_converse_typename=Value(False), ) ) ) @@ -678,7 +687,7 @@ def populate_relation_fields(self, relations): place_relation_types = PlacePlaceRelationType.objects.filter( id__in=[r[RTID] for r in relations if r[TYPE] == "Place"] ).values("id", "name") - place_relation_typedict = {t["id"]: t["name"] for t in place_relation_types} + place_relation_typedict = {t["id"]: t for t in place_relation_types} doc_relation_types = DocumentPlaceRelationType.objects.filter( id__in=[r[RTID] for r in relations if r[TYPE] == "Document"] ).values("id", "name") @@ -784,10 +793,18 @@ def populate_relation_fields(self, relations): and n.get("content_type") == place_contenttype_id, names, ) + rel_type = place_relation_typedict.get(rel[RTID]) rel.update( { "related_object_name": next(filtered_names).get("name"), - "relationship_type": place_relation_typedict.get(rel[RTID]), + "relationship_type": ( + # handle converse type names for self-referential relationships + rel_type.get("converse_name") + if rel["use_converse_typename"] + and rel_type.get("converse_name") + # use name if should use name, or converse does not exist + else rel_type.get("name") + ), "shared_documents": ", ".join( [ docs_dict.get(doc_id, {}).get("name") diff --git a/geniza/entities/migrations/0026_person_tags.py b/geniza/entities/migrations/0026_person_tags.py new file mode 100644 index 000000000..a98a1270a --- /dev/null +++ b/geniza/entities/migrations/0026_person_tags.py @@ -0,0 +1,26 @@ +# Generated by Django 3.2.23 on 2025-01-23 17:51 + +import taggit_selectize.managers +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("taggit", "0003_taggeditem_add_unique_index"), + ("entities", "0025_person_date"), + ] + + operations = [ + migrations.AddField( + model_name="person", + name="tags", + field=taggit_selectize.managers.TaggableManager( + blank=True, + help_text="A comma-separated list of tags.", + related_name="tagged_person", + through="taggit.TaggedItem", + to="taggit.Tag", + verbose_name="Tags", + ), + ), + ] diff --git a/geniza/entities/migrations/0027_placeplacerelation_converse_name.py b/geniza/entities/migrations/0027_placeplacerelation_converse_name.py new file mode 100644 index 000000000..860a33c30 --- /dev/null +++ b/geniza/entities/migrations/0027_placeplacerelation_converse_name.py @@ -0,0 +1,51 @@ +# Generated by Django 3.2.23 on 2025-01-24 18:26 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("entities", "0026_person_tags"), + ] + + operations = [ + migrations.AddField( + model_name="placeplacerelationtype", + name="converse_name", + field=models.CharField( + blank=True, + help_text="The converse of the relationship, for example, 'City' when Name is 'Neighborhood'. May leave blank if the converse is identical (for example, 'Possibly the same as').", + max_length=255, + ), + ), + migrations.AddField( + model_name="placeplacerelationtype", + name="converse_name_ar", + field=models.CharField( + blank=True, + help_text="The converse of the relationship, for example, 'City' when Name is 'Neighborhood'. May leave blank if the converse is identical (for example, 'Possibly the same as').", + max_length=255, + null=True, + ), + ), + migrations.AddField( + model_name="placeplacerelationtype", + name="converse_name_en", + field=models.CharField( + blank=True, + help_text="The converse of the relationship, for example, 'City' when Name is 'Neighborhood'. May leave blank if the converse is identical (for example, 'Possibly the same as').", + max_length=255, + null=True, + ), + ), + migrations.AddField( + model_name="placeplacerelationtype", + name="converse_name_he", + field=models.CharField( + blank=True, + help_text="The converse of the relationship, for example, 'City' when Name is 'Neighborhood'. May leave blank if the converse is identical (for example, 'Possibly the same as').", + max_length=255, + null=True, + ), + ), + ] diff --git a/geniza/entities/models.py b/geniza/entities/models.py index 2086d4566..62851c8f3 100644 --- a/geniza/entities/models.py +++ b/geniza/entities/models.py @@ -3,7 +3,6 @@ from datetime import datetime from math import modf from operator import itemgetter -from time import sleep from django.conf import settings from django.contrib.admin.models import CHANGE, LogEntry @@ -11,19 +10,21 @@ from django.contrib.contenttypes.fields import GenericForeignKey, GenericRelation from django.contrib.contenttypes.models import ContentType from django.core.validators import RegexValidator -from django.db import models +from django.db import IntegrityError, models, transaction from django.db.models import F, Q, Value from django.db.models.query import Prefetch from django.forms import ValidationError from django.urls import reverse +from django.utils.html import strip_tags from django.utils.translation import gettext as _ from gfklookupwidget.fields import GfkLookupField from parasolr.django import AliasedSolrQuerySet from parasolr.django.indexing import ModelIndexable from slugify import slugify +from taggit_selectize.managers import TaggableManager from unidecode import unidecode -from geniza.common.models import TrackChangesModel, cached_class_property +from geniza.common.models import TaggableMixin, TrackChangesModel, cached_class_property from geniza.corpus.dates import DocumentDateMixin, PartialDate, standard_date_display from geniza.corpus.models import ( DisplayLabelMixin, @@ -106,9 +107,13 @@ class DocumentDatableMixin: @property def documents_date_range(self): - """Standardized range of dates across associated documents""" + """Compute the total range of dates across all associated documents""" + return self.get_date_range(self.documents.all()) + + def get_date_range(self, doc_set): + """Standardized range of dates across a set of documents""" full_range = [None, None] - for doc in self.documents.all(): + for doc in doc_set: # get each doc's full range, including inferred and on-document doc_range = doc.dating_range() # if doc has a range, and the range is not [None, None], compare @@ -120,7 +125,6 @@ def documents_date_range(self): full_range = PartialDate.get_date_range( old_range=full_range, new_range=[start, end] ) - # prune Nones and use isoformat for standardized representation full_range = [d.isoformat() for d in full_range if d] if len(full_range) == 2 and full_range[0] == full_range[1]: @@ -312,7 +316,9 @@ def related_delete(sender, instance=None, raw=False, **_kwargs): PersonSignalHandlers.related_change(instance, raw, "delete") -class Person(ModelIndexable, SlugMixin, DocumentDatableMixin, PermalinkMixin): +class Person( + ModelIndexable, SlugMixin, DocumentDatableMixin, PermalinkMixin, TaggableMixin +): """A person entity that appears within the PGP corpus.""" names = GenericRelation(Name, related_query_name="person") @@ -359,6 +365,8 @@ class Person(ModelIndexable, SlugMixin, DocumentDatableMixin, PermalinkMixin): log_entries = GenericRelation(LogEntry, related_query_name="document") + tags = TaggableManager(blank=True, related_name="tagged_person") + # gender options MALE = "M" FEMALE = "F" @@ -409,22 +417,27 @@ def __str__(self): @property def date_str(self): - """Return a formatted string for the person's date range, for use in public site. + """Return a formatted string for the person's active date range, for use in public site. CE date override takes highest precedence, then fallback to computed date range from associated documents if override is unset. """ return ( standard_date_display(self.date) - or standard_date_display(self.documents_date_range) + or standard_date_display(self.active_date_range) or "" ) + @property + def deceased_date_str(self): + """Return a formatted string for the person's mentioned as deceased date range.""" + return standard_date_display(self.deceased_date_range) or "" + def solr_date_range(self): """Return the person's date range as a Solr date range.""" if self.date: solr_dating_range = self.date.split("/") else: - solr_dating_range = self.documents_date_range.split("/") + solr_dating_range = self.active_date_range.split("/") if solr_dating_range: # if a single date instead of a range, just return that date if ( @@ -642,6 +655,27 @@ def related_people(self): return relation_list + @property + def active_date_range(self): + """Standardized range of dates across documents where a person is (presumed) alive""" + relations = self.persondocumentrelation_set.exclude( + type__name__icontains="deceased" + ) + doc_ids = relations.values_list("document__pk", flat=True) + docs = Document.objects.filter(pk__in=doc_ids) + return self.get_date_range(docs) + + @property + def deceased_date_range(self): + """Standardized range of dates across associated documents where the relationship is + 'Mentioned (deceased)'""" + relations = self.persondocumentrelation_set.filter( + type__name__icontains="deceased" + ) + doc_ids = relations.values_list("document__pk", flat=True) + docs = Document.objects.filter(pk__in=doc_ids) + return self.get_date_range(docs) + def merge_with(self, merge_people, user=None): """Merge the specified people into this one. Combines all metadata into this person and creates a log entry documenting the merge. @@ -871,6 +905,7 @@ def index_data(self): # basic metadata "slug_s": self.slug, "name_s": str(self), + "other_names_ss": [n.name for n in self.names.non_primary()], "description_txt": self.description_en, "gender_s": self.get_gender_display(), "role_s": self.role.name_en if self.role else None, @@ -886,6 +921,7 @@ def index_data(self): "type__name_en", flat=True ).distinct() ), + "tags_ss_lower": [t.name for t in self.tags.all()], } ) solr_date_range = self.solr_date_range() @@ -896,7 +932,7 @@ def index_data(self): for date in ( self.date.split("/") if self.date - else self.documents_date_range.split("/") + else self.active_date_range.split("/") ) ] index_data.update( @@ -951,8 +987,12 @@ class PersonSolrQuerySet(AliasedSolrQuerySet): #: map readable field names to actual solr fields field_aliases = { + "id": "id", # needed to match results with highlighting "slug": "slug_s", "name": "name_s", + # need access to these other_names fields for highlighting + "other_names_nostem": "other_names_nostem", + "other_names_bigram": "other_names_bigram", "description": "description_txt", "gender": "gender_s", "role": "role_s", @@ -963,8 +1003,54 @@ class PersonSolrQuerySet(AliasedSolrQuerySet): "document_relations": "document_relation_ss", "date_str": "date_str_s", "has_page": "has_page_b", + "tags": "tags_ss_lower", } + search_aliases = field_aliases.copy() + search_aliases.update( + { + # when searching, singular makes more sense for tags + "tag": field_aliases["tags"], + } + ) + re_solr_fields = re.compile( + r"(%s):" % "|".join(key for key, val in search_aliases.items() if key != val), + flags=re.DOTALL, + ) + + keyword_search_qf = "{!type=edismax qf=$people_qf pf=$people_pf v=$keyword_query}" + + def keyword_search(self, search_term): + """Allow searching using keywords with the specified query and phrase match + fields, and set the default operator to AND""" + if ":" in search_term: + # if any of the field aliases occur with a colon, replace with actual solr field + search_term = self.re_solr_fields.sub( + lambda x: "%s:" % self.search_aliases[x.group(1)], search_term + ) + query_params = {"keyword_query": search_term, "q.op": "AND"} + return self.search(self.keyword_search_qf).raw_query_parameters( + **query_params, + ) + + def get_highlighting(self): + """dedupe highlights across variant fields (e.g. for other_names)""" + highlights = super().get_highlighting() + highlights = {k: v for k, v in highlights.items() if v} + for person in highlights.keys(): + other_names = set() + # iterate through other_names_* fields to get all matches + for hls in [ + highlights[person][field] + for field in highlights[person].keys() + if field.startswith("other_names_") + ]: + # strip highglight tags and whitespace, then add to set + cleaned_names = [strip_tags(hl.strip()) for hl in hls] + other_names.update(set(cleaned_names)) + highlights[person]["other_names"] = [n for n in other_names if n] + return highlights + class PastPersonSlug(models.Model): """A slug that was previously associated with a :class:`Person`; @@ -987,11 +1073,79 @@ def get_by_natural_key(self, name): return self.get(name_en=name) -class PersonDocumentRelationType(models.Model): +class MergeRelationTypesMixin: + """Mixin to include shared merge logic for relation types. + Requires inheriting relation type model to make its relationships + queryset available generically by the method name :meth:`relation_set`""" + + def merge_with(self, merge_relation_types, user=None): + """Merge the specified relation types into this one. Combines all + relationships into this relation type and creates a log entry + documenting the merge. + + Closely adapted from :class:`Person` merge.""" + + # if user is not specified, log entry will be associated with script + if user is None: + user = User.objects.get(username=settings.SCRIPT_USERNAME) + + for rel_type in merge_relation_types: + # combine log entries + for log_entry in rel_type.log_entries.all(): + # annotate and reassociate + # - modify change message to type which object this event applied to + log_entry.change_message = "%s [merged type %s (id = %d)]" % ( + log_entry.get_change_message(), + str(rel_type), + rel_type.pk, + ) + + # - associate with the primary relation type + log_entry.object_id = self.id + log_entry.content_type_id = ContentType.objects.get_for_model( + self.__class__ + ) + log_entry.save() + + # combine relationships + for relationship in rel_type.relation_set(): + # set type of each relationship to primary relation type + relationship.type = self + # handle unique constraint violation (one relationship per type + # between doc and person): only reassign type if it doesn't + # create a duplicate, otherwise delete. + # see https://docs.djangoproject.com/en/3.2/topics/db/transactions/#django.db.transaction.atomic + try: + with transaction.atomic(): + relationship.save() + except IntegrityError: + relationship.delete() + + # save current relation type with changes; delete merged relation types + self.save() + merged_types = ", ".join([str(rel_type) for rel_type in merge_relation_types]) + for rel_type in merge_relation_types: + rel_type.delete() + # create log entry documenting the merge; include rationale + rtype_contenttype = ContentType.objects.get_for_model(self.__class__) + LogEntry.objects.log_action( + user_id=user.id, + content_type_id=rtype_contenttype.pk, + object_id=self.pk, + object_repr=str(self), + change_message="merged with %s" % (merged_types,), + action_flag=CHANGE, + ) + + +class PersonDocumentRelationType(MergeRelationTypesMixin, models.Model): """Controlled vocabulary of people's relationships to documents.""" name = models.CharField(max_length=255, unique=True) objects = PersonDocumentRelationTypeManager() + log_entries = GenericRelation( + LogEntry, related_query_name="persondocumentrelationtype" + ) class Meta: verbose_name = "Person-Document relationship" @@ -1008,6 +1162,10 @@ def objects_by_label(cls): for obj in cls.objects.all() } + def relation_set(self): + # own relationships QuerySet as required by MergeRelationTypesMixin + return self.persondocumentrelation_set.all() + class PersonDocumentRelation(models.Model): """A relationship between a person and a document.""" @@ -1043,7 +1201,7 @@ def get_by_natural_key(self, name): return self.get(name_en=name) -class PersonPersonRelationType(models.Model): +class PersonPersonRelationType(MergeRelationTypesMixin, models.Model): """Controlled vocabulary of people's relationships to other people.""" # name of the relationship @@ -1074,6 +1232,9 @@ class PersonPersonRelationType(models.Model): choices=CATEGORY_CHOICES, ) objects = PersonPersonRelationTypeManager() + log_entries = GenericRelation( + LogEntry, related_query_name="personpersonrelationtype" + ) class Meta: verbose_name = "Person-Person relationship" @@ -1082,6 +1243,10 @@ class Meta: def __str__(self): return self.name + def relation_set(self): + # own relationships QuerySet as required by MergeRelationTypesMixin + return self.personpersonrelation_set.all() + class PersonPersonRelation(models.Model): """A relationship between two people.""" @@ -1284,6 +1449,92 @@ def items_to_index(cls): "names", "documentplacerelation_set", "personplacerelation_set" ) + def related_places(self): + """Set of all places related to this place, with relationship type, + taking into account converse relations""" + + # adapted from Person.related_people + + # gather all relationships with places, both entered from this place and + # entered from the place on the other side of the relationship + place_relations = ( + self.place_a.annotate( + # boolean to indicate if we should use converse or regular relation type name + use_converse_typename=Value(True), + related_slug=F("place_a__slug"), + related_id=F("place_a"), + ).union( # union instead of joins for efficiency + self.place_b.annotate( + use_converse_typename=Value(False), + related_slug=F("place_b__slug"), + related_id=F("place_b"), + ) + ) + # have to use values_list (NOT values) here with one argument, otherwise strange things + # happen, possibly due to union(). TODO: see if this is fixed in later django versions. + .values_list("related_id") + ) + relation_list = [ + { + # this is the order of fields returned by SQL after the annotated union + "id": r[-1], + "slug": r[-2], + "use_converse_typename": r[-3], + "notes": r[-4], + "type_id": r[-5], + } + for r in place_relations + ] + + # folow GenericForeignKey to find primary name for each related place + place_contenttype = ContentType.objects.get_for_model(Place).pk + names = Name.objects.filter( + object_id__in=[r["id"] for r in relation_list], + primary=True, + content_type_id=place_contenttype, + ).values("name", "object_id") + # dict keyed on related place id + names_dict = {n["object_id"]: n["name"] for n in names} + + # grab name and converse_name for each relation type since we may need either + # (name if the relation was entered from self, converse if entered from related place) + types = PlacePlaceRelationType.objects.filter( + pk__in=[r["type_id"] for r in relation_list], + ).values("pk", "name", "converse_name") + # dict keyed on related place id + types_dict = {t["pk"]: t for t in types} + + # update with new data & dedupe + prev_relation = None + # sort by id (dedupe by matching against previous id), then type id for type dedupe + for relation in sorted(relation_list, key=itemgetter("id", "type_id")): + relation.update( + { + # get name from cached queryset dict + "name": names_dict[relation["id"]], + # use type.converse_name if this relation is reverse (and if the type has one) + "type": types_dict[relation["type_id"]][ + "converse_name" if relation["use_converse_typename"] else "name" + ] + # fallback to type.name if converse_name doesn't exist + or types_dict[relation["type_id"]]["name"], + } + ) + # dedupe and combine type and notes + if prev_relation and prev_relation["id"] == relation["id"]: + # dedupe type by string matching since we can't match reverse relations by id + if relation["type"].lower() not in prev_relation["type"].lower(): + prev_relation["type"] += f", {relation['type']}".lower() + # simply combine notes with html line break + prev_relation["notes"] += ( + f"
{relation['notes']}" if relation["notes"] else "" + ) + relation_list.remove(relation) + else: + prev_relation = relation + + return relation_list + def index_data(self): """data for indexing in Solr""" index_data = super().index_data() @@ -1448,6 +1699,15 @@ class PlacePlaceRelationType(models.Model): """Controlled vocabulary of place's relationships to other places.""" name = models.CharField(max_length=255, unique=True) + # converse_name is the relationship in the reverse direction (the semantic converse) + # (example: name = "Neighborhood", converse_name = "City") + converse_name = models.CharField( + max_length=255, + blank=True, + help_text="The converse of the relationship, for example, 'City' when Name is " + + "'Neighborhood'. May leave blank if the converse is identical (for example, " + + "'Possibly the same as').", + ) objects = PlacePlaceRelationTypeManager() class Meta: @@ -1496,7 +1756,12 @@ class Meta: ] def __str__(self): - return f"{self.type} relation: {self.place_a} and {self.place_b}" + relation_type = ( + f"{self.type}-{self.type.converse_name}" + if self.type.converse_name + else self.type + ) + return f"{relation_type} relation: {self.place_a} and {self.place_b}" class PlaceEventRelation(models.Model): diff --git a/geniza/entities/templates/admin/entities/persondocumentrelationtype/merge.html b/geniza/entities/templates/admin/entities/persondocumentrelationtype/merge.html new file mode 100644 index 000000000..fcb4061a4 --- /dev/null +++ b/geniza/entities/templates/admin/entities/persondocumentrelationtype/merge.html @@ -0,0 +1,53 @@ +{% extends 'admin/base_site.html' %} + +{% load admin_urls static %} + +{% block extrastyle %} + {{ block.super }} + + +{% endblock %} + +{% block title %} Merge selected person-document relationships {% endblock %} + +{% block breadcrumbs %} + {% if not is_popup %} + + {% endif %} +{% endblock %} + + +{% block content_title %} +

Merge selected person-document relationships

+

Note: there is no automated way to unmerge! Please review to make sure these relationships should be merged before submitting the form.

+{% endblock %} + +{% block content %} +
+ {% csrf_token %} + {% if form.errors|length > 0 %} +

+ Please correct the error below. +

+ {% endif %} +
+
+ {{ form.primary_relation_type.label_tag }} + {{ form.primary_relation_type }} +

{{ form.primary_relation_type.help_text|safe }}

+
+ +
+ +
+ + +{% endblock %} diff --git a/geniza/entities/templates/admin/entities/personpersonrelationtype/merge.html b/geniza/entities/templates/admin/entities/personpersonrelationtype/merge.html new file mode 100644 index 000000000..880a3f363 --- /dev/null +++ b/geniza/entities/templates/admin/entities/personpersonrelationtype/merge.html @@ -0,0 +1,53 @@ +{% extends 'admin/base_site.html' %} + +{% load admin_urls static %} + +{% block extrastyle %} + {{ block.super }} + + +{% endblock %} + +{% block title %} Merge selected person-person relationships {% endblock %} + +{% block breadcrumbs %} + {% if not is_popup %} + + {% endif %} +{% endblock %} + + +{% block content_title %} +

Merge selected person-person relationships

+

Note: there is no automated way to unmerge! Please review to make sure these relationships should be merged before submitting the form.

+{% endblock %} + +{% block content %} +
+ {% csrf_token %} + {% if form.errors|length > 0 %} +

+ Please correct the error below. +

+ {% endif %} +
+
+ {{ form.primary_relation_type.label_tag }} + {{ form.primary_relation_type }} +

{{ form.primary_relation_type.help_text|safe }}

+
+ +
+ +
+ + +{% endblock %} diff --git a/geniza/entities/templates/entities/person_detail.html b/geniza/entities/templates/entities/person_detail.html index 0aca1533f..8cf768214 100644 --- a/geniza/entities/templates/entities/person_detail.html +++ b/geniza/entities/templates/entities/person_detail.html @@ -33,10 +33,23 @@

{# Translators: label for a person's gender #}
{% translate 'Gender' %}
{{ person.get_gender_display }}
- {% if person.date_str %} - {# Translators: label for a person's active dates in the PGP #} -
{% translate 'Active dates' %}
-
{{ person.date_str }}
+ {% if person.date_str or person.deceased_date_str %} + {# Translators: label for the dates of a person's appearances in PGP documents #} +
{% translate 'Dates' %}
+
+
+ {% if person.date_str %} + {# Translators: label for a person's active dates in the PGP #} +
{% translate 'Active dates' %}
+
{{ person.date_str }}
+ {% endif %} + {% if person.deceased_date_str %} + {# Translators: label for a person's date range when mentioned after death in PGP documents #} +
{% translate 'Posthumous mentions' %}
+
{{ person.deceased_date_str }}
+ {% endif %} +
+
{% endif %} {% if person.role %}
diff --git a/geniza/entities/templates/entities/person_list.html b/geniza/entities/templates/entities/person_list.html index 3197c9b5d..beabf6893 100644 --- a/geniza/entities/templates/entities/person_list.html +++ b/geniza/entities/templates/entities/person_list.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% load static i18n humanize widget_tweaks %} +{% load static i18n humanize corpus_extras widget_tweaks %} {% block meta_title %}{{ page_title }}{% endblock meta_title %} {% block meta_description %}{{ page_description }}{% endblock meta_description %} @@ -8,6 +8,13 @@

{{ page_title }}

+
+ {% render_field form.q data-search-target="query" data-action="input->search#autoUpdateRadioSort change->search#update" %} + + {# Translators: Search submit button #} + {% translate 'Submit search' as search_label %} +