From 034d5e7e327b51c1e7b2f325230a0fde046ffee9 Mon Sep 17 00:00:00 2001
From: Daniel Rabstejnek <rabstejnek@gmail.com>
Date: Tue, 4 Feb 2025 09:04:13 -0500
Subject: [PATCH 1/5] Initial code for deduplication workflow

---
 hawc/apps/lit/constants.py                    |  4 +
 ...0025_dedupesettings_duplicatecandidates.py | 84 +++++++++++++++++++
 hawc/apps/lit/models.py                       | 66 +++++++++++++++
 .../lit/templates/_duplicate_candidates.html  |  1 +
 .../templates/lit/duplicate_candidates.html   | 14 ++++
 hawc/apps/lit/urls.py                         |  5 ++
 hawc/apps/lit/views.py                        | 17 ++++
 7 files changed, 191 insertions(+)
 create mode 100644 hawc/apps/lit/migrations/0025_dedupesettings_duplicatecandidates.py
 create mode 100644 hawc/apps/lit/templates/_duplicate_candidates.html
 create mode 100644 hawc/apps/lit/templates/lit/duplicate_candidates.html

diff --git a/hawc/apps/lit/constants.py b/hawc/apps/lit/constants.py
index 1c51472140..039980837e 100644
--- a/hawc/apps/lit/constants.py
+++ b/hawc/apps/lit/constants.py
@@ -28,6 +28,10 @@ class SearchType(models.TextChoices):
     SEARCH = "s", "Search"
     IMPORT = "i", "Import"
 
+class DuplicateResolution(models.IntegerChoices):
+    UNRESOLVED = 0, "Unresolved"
+    RESOLVED = 1, "Resolved" # TODO: change to "primary identified"
+    FALSE_POSITIVE = 2, "False positive"
 
 # generalized/adapted from https://www.crossref.org/blog/dois-and-matching-regular-expressions/
 DOI_EXACT = re.compile(r"^10\.\d{4,9}/[^\s]+$")
diff --git a/hawc/apps/lit/migrations/0025_dedupesettings_duplicatecandidates.py b/hawc/apps/lit/migrations/0025_dedupesettings_duplicatecandidates.py
new file mode 100644
index 0000000000..7e28051935
--- /dev/null
+++ b/hawc/apps/lit/migrations/0025_dedupesettings_duplicatecandidates.py
@@ -0,0 +1,84 @@
+# Generated by Django 5.1.4 on 2025-02-03 08:23
+
+import django.contrib.postgres.fields
+import django.db.models.deletion
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("assessment", "0047_alter_labeleditem_options"),
+        ("lit", "0024_workflows"),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="DedupeSettings",
+            fields=[
+                (
+                    "id",
+                    models.AutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+            ],
+        ),
+        migrations.CreateModel(
+            name="DuplicateCandidates",
+            fields=[
+                (
+                    "id",
+                    models.AutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "resolution",
+                    models.PositiveSmallIntegerField(
+                        choices=[
+                            (0, "Unresolved"),
+                            (1, "Resolved"),
+                            (2, "False positive"),
+                        ],
+                        default=0,
+                    ),
+                ),
+                (
+                    "candidates",
+                    django.contrib.postgres.fields.ArrayField(
+                        base_field=models.IntegerField(), size=None, unique=True
+                    ),
+                ),
+                ("primary", models.IntegerField(null=True)),
+                ("notes", models.TextField(blank=True)),
+                ("created", models.DateTimeField(auto_now_add=True)),
+                ("last_updated", models.DateTimeField(auto_now=True)),
+                (
+                    "assessment",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        related_name="duplicates",
+                        to="assessment.assessment",
+                    ),
+                ),
+                (
+                    "resolving_user",
+                    models.ForeignKey(
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="resolved_duplicates",
+                        to=settings.AUTH_USER_MODEL,
+                    ),
+                ),
+            ],
+        ),
+    ]
diff --git a/hawc/apps/lit/models.py b/hawc/apps/lit/models.py
index ed396ec210..3b82ab136e 100644
--- a/hawc/apps/lit/models.py
+++ b/hawc/apps/lit/models.py
@@ -1458,6 +1458,72 @@ def get_description(self) -> str:
         )
 
 
+# add parameters/select on literatureassessment model
+class DedupeSettings(models.Model):
+    # deduper for use in an assessment
+    # for first pass maybe we just have a global deduper, or static choices, so that we don't have to build this
+    assessment:"Assessment"
+    parameters:dict # list of parameters for deduplication? ie schema of dedupe modules to use?
+
+    def build_deduper(self):
+        # return deduper instance using self.parameters
+        return
+
+
+# SOFT DELETES
+
+class SortedArrayField(ArrayField):
+    pass
+
+class DuplicateCandidates(models.Model):
+    assessment = models.ForeignKey(
+        "assessment.Assessment", on_delete=models.CASCADE, related_name="duplicates"
+    )
+    resolution = models.PositiveSmallIntegerField(
+        choices=constants.DuplicateResolution,
+        default=constants.DuplicateResolution.UNRESOLVED
+    )
+    resolving_user = models.ForeignKey(HAWCUser, null=True, on_delete=models.SET_NULL, related_name="resolved_duplicates")
+    candidates = ArrayField(models.IntegerField(),unique=True)
+    primary = models.IntegerField(null=True)
+    notes = models.TextField(blank=True)
+    created = models.DateTimeField(auto_now_add=True)
+    last_updated = models.DateTimeField(auto_now=True)
+
+    def generate_unique_identifier(self):
+        return sorted(self.candidates)
+
+
+    def resolve(self,primaries:dict|list[dict]):
+        if primaries is None:
+            self.resolution = "False positive"
+        else:
+            self.resolution = "Duplicates detected"
+            # remove reference instances pointed to in self.data that are not primaries
+            # update reference instances in primaries with data?
+            self.result = primaries
+
+# where to put execute method? literatureassessment, manager for dupes model
+
+"""
+WORKFLOW
+
+User defines deduper for use in assessment
+User executes a session that uses a defined deduper
+Session stores list of identified candidate duplicate groups
+User resolves duplicates in a session; if group status != unresolved, it shows up on this page
+Perhaps a seperate session page of resolved groups? ie an "in progress" list view and a "done" list view
+Multiple resolutions at once? Or more like screen page in LLR where its do one, click for next (look at conflict resolution)
+Should this workflow do anything proactive? ie lets say a candidate group is identified false positive, is it a big deal if it shows up again if a user executes another session w/ same settings? (yes)
+Single user right? Not like conflict resolution? THIS IS CORRECT
+Do we want this workflow to also happen on import? That would look slightly different
+    Though maybe we could just have it happen automatically AFTER import, that way it would use the same workflow
+    If used on import, do we add "choose a deduper" option to created search? or maybe "default" attribute to deduper, whichever one is "default" is used?
+    Each assessment has undeletable "default" deduper, maybe add noop setting choice for deduper for people who don't want it running on imports?
+"""
+
+
+
 reversion.register(LiteratureAssessment)
 reversion.register(Search)
 reversion.register(ReferenceFilterTag)
diff --git a/hawc/apps/lit/templates/_duplicate_candidates.html b/hawc/apps/lit/templates/_duplicate_candidates.html
new file mode 100644
index 0000000000..fb563ed668
--- /dev/null
+++ b/hawc/apps/lit/templates/_duplicate_candidates.html
@@ -0,0 +1 @@
+<div>This is a stub for a duplicate candidate group resolution. (Duplicate candidates {{object.pk}})</div>
\ No newline at end of file
diff --git a/hawc/apps/lit/templates/lit/duplicate_candidates.html b/hawc/apps/lit/templates/lit/duplicate_candidates.html
new file mode 100644
index 0000000000..7c89df53d0
--- /dev/null
+++ b/hawc/apps/lit/templates/lit/duplicate_candidates.html
@@ -0,0 +1,14 @@
+{% extends 'assessment-rooted.html' %}
+
+{% load bs4 %}
+{% block content %}
+  <div class="d-flex">
+    <h2>Duplicate candidates</h2>
+  </div>
+  <ul class="list-group list-group-flush my-3">
+    {% for object in object_list %}
+        {% include 'lit/_duplicate_candidates.html' %}
+    {% endfor %}
+  </ul>
+  {% include "includes/paginator.html" with plural_object_name="duplicate groups" %}
+{% endblock content %}
diff --git a/hawc/apps/lit/urls.py b/hawc/apps/lit/urls.py
index 137e197ee4..f5d2fba513 100644
--- a/hawc/apps/lit/urls.py
+++ b/hawc/apps/lit/urls.py
@@ -163,4 +163,9 @@
         name="workflow-htmx",
     ),
     path("api/", include((router.urls, "api"))),
+    path(
+        "assessment/<int:pk>/duplicate-candidates/",
+        views.DuplicateCandidatesList.as_view(),
+        name="duplicate-candidates",
+    ),
 ]
diff --git a/hawc/apps/lit/views.py b/hawc/apps/lit/views.py
index 0eea38e364..7f3f081bc6 100644
--- a/hawc/apps/lit/views.py
+++ b/hawc/apps/lit/views.py
@@ -1250,3 +1250,20 @@ def venn_reference_list(self, request, *args, **kwargs):
             "qs": models.Reference.objects.assessment_qs(self.assessment.id).filter(id__in=ids)
         }
         return render(request, "lit/components/venn_reference_list.html", context=context)
+
+
+
+class DuplicateCandidatesList(BaseList):
+    parent_model = Assessment
+    model = models.DuplicateCandidates
+    template_name = "lit/duplicate_candidates.html"
+    breadcrumb_active_name = "Duplicate candidates"
+
+    def get_queryset(self):
+        return (
+            super().get_queryset().filter(assessment=self.assessment).filter(resolution=constants.DuplicateResolution.UNRESOLVED)
+        )
+
+    def get_context_data(self, **kwargs):
+        context = super().get_context_data(**kwargs)
+

From 27c0a25bbc80964c170d97895caa982e8e857621 Mon Sep 17 00:00:00 2001
From: Daniel Rabstejnek <rabstejnek@gmail.com>
Date: Thu, 6 Feb 2025 10:03:44 -0500
Subject: [PATCH 2/5] Changes

---
 hawc/apps/lit/api.py                          | 27 +++++++++-
 .../lit/migrations/0026_reference_hidden.py   | 18 +++++++
 hawc/apps/lit/models.py                       | 49 +++++++++++++++----
 .../lit/templates/_duplicate_candidates.html  |  1 -
 .../templates/lit/_duplicate_candidates.html  | 18 +++++++
 .../templates/lit/duplicate_candidates.html   |  3 ++
 .../templates/lit/duplicate_candidates_2.html | 28 +++++++++++
 hawc/apps/lit/urls.py                         | 11 +++++
 hawc/apps/lit/views.py                        | 27 +++++++++-
 9 files changed, 170 insertions(+), 12 deletions(-)
 create mode 100644 hawc/apps/lit/migrations/0026_reference_hidden.py
 delete mode 100644 hawc/apps/lit/templates/_duplicate_candidates.html
 create mode 100644 hawc/apps/lit/templates/lit/_duplicate_candidates.html
 create mode 100644 hawc/apps/lit/templates/lit/duplicate_candidates_2.html

diff --git a/hawc/apps/lit/api.py b/hawc/apps/lit/api.py
index c407254202..f0aa33cec0 100644
--- a/hawc/apps/lit/api.py
+++ b/hawc/apps/lit/api.py
@@ -18,7 +18,7 @@
 from ..assessment.constants import AssessmentViewSetPermissions
 from ..assessment.models import Assessment
 from ..common.api import OncePerMinuteThrottle, PaginationWithCount
-from ..common.helper import FlatExport, cacheable
+from ..common.helper import FlatExport, cacheable, tryParseInt
 from ..common.renderers import PandasRenderers
 from ..common.serializers import UnusedSerializer
 from ..common.views import create_object_log
@@ -426,3 +426,28 @@ def id_search(self, request, db_id: str, id: str):
             df=qs.global_df(),
             filename=f"global-reference-data-{id}",
         )
+
+
+
+
+class DuplicateViewSet(
+    BaseAssessmentViewSet,
+):
+    model = models.DuplicateCandidates
+    http_method_names = ["post"]
+
+    @action(
+        detail=True, methods=("post",), action_perms=AssessmentViewSetPermissions.CAN_EDIT_OBJECT
+    )
+    def resolve_duplicate(self, request, pk):
+        instance = self.get_object()
+        assessment = instance.assessment
+        if not assessment.user_can_edit_object(self.request.user):
+            raise PermissionDenied()
+        resolution = request.POST.get("resolution")
+        notes = request.POST.get("notes","")
+        if resolution == "none":
+            instance.resolve(resolution=constants.DuplicateResolution.FALSE_POSITIVE,notes=notes)
+        if (resolution:=tryParseInt(resolution)) is not None:
+            instance.resolve(resolution=constants.DuplicateResolution.RESOLVED,primary=resolution,notes=notes)
+        return Response({"status": "ok"})
diff --git a/hawc/apps/lit/migrations/0026_reference_hidden.py b/hawc/apps/lit/migrations/0026_reference_hidden.py
new file mode 100644
index 0000000000..a28c209ed9
--- /dev/null
+++ b/hawc/apps/lit/migrations/0026_reference_hidden.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.1.4 on 2025-02-05 17:43
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("lit", "0025_dedupesettings_duplicatecandidates"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="reference",
+            name="hidden",
+            field=models.BooleanField(default=False),
+        ),
+    ]
diff --git a/hawc/apps/lit/models.py b/hawc/apps/lit/models.py
index 3b82ab136e..1b1420bf0d 100644
--- a/hawc/apps/lit/models.py
+++ b/hawc/apps/lit/models.py
@@ -6,6 +6,7 @@
 from math import ceil
 from typing import Self
 from urllib import parse
+import random
 
 from celery import chain
 from celery.result import ResultBase
@@ -849,6 +850,7 @@ class Reference(models.Model):
         null=True,
         help_text="Used internally for determining when reference was " "originally added",
     )
+    hidden = models.BooleanField(default=False)
 
     BREADCRUMB_PARENT = "assessment"
 
@@ -1490,20 +1492,49 @@ class DuplicateCandidates(models.Model):
     created = models.DateTimeField(auto_now_add=True)
     last_updated = models.DateTimeField(auto_now=True)
 
+    def get_assessment(self):
+        return self.assessment
+
+    @classmethod
+    def foobar(cls,assessment):
+        references = assessment.references.values("pk","title")
+        candidate_groups = cls.random_execute(references)
+        cls.objects.bulk_create([cls(assessment=assessment,candidates=[ref["pk"] for ref in group]) for group in candidate_groups])
+
+    @classmethod
+    def random_execute(cls,references)->list[list[dict]]:
+        num_candidates = 2
+        if len(references)<num_candidates:
+            return []
+        num_groups = min(3,len(references)/num_candidates)
+        return [random.choices(references,k=num_candidates) for i in range(num_groups)]
+
     def generate_unique_identifier(self):
         return sorted(self.candidates)
+    
+    def _update_references(self):
+        # TODO also make primary not hidden? may be unnecessary
+        duplicate_ids = set(self.candidates)-{self.primary}
+        self.assessment.references.filter(pk__in=duplicate_ids).update(hidden=True)
+
+    def resolve(self,resolution:constants.DuplicateResolution,primary:int=None,notes:str=""):
+        if resolution == constants.DuplicateResolution.UNRESOLVED:
+            raise ValueError("Resolution must not be unresolved.")
+        if resolution == constants.DuplicateResolution.RESOLVED:
+            if primary is None:
+                raise ValueError("Primary must not be None if duplicate identified.")
+            if primary not in self.candidates:
+                raise ValueError("Primary must be a candidate.")
+            self.primary = primary
+            #self._update_references()
+        self.resolution = resolution
+        self.notes = notes
+        self.save()
 
+# where to put execute method? literatureassessment, manager for dupes model
 
-    def resolve(self,primaries:dict|list[dict]):
-        if primaries is None:
-            self.resolution = "False positive"
-        else:
-            self.resolution = "Duplicates detected"
-            # remove reference instances pointed to in self.data that are not primaries
-            # update reference instances in primaries with data?
-            self.result = primaries
 
-# where to put execute method? literatureassessment, manager for dupes model
+# DuplicateCandidateGroup
 
 """
 WORKFLOW
diff --git a/hawc/apps/lit/templates/_duplicate_candidates.html b/hawc/apps/lit/templates/_duplicate_candidates.html
deleted file mode 100644
index fb563ed668..0000000000
--- a/hawc/apps/lit/templates/_duplicate_candidates.html
+++ /dev/null
@@ -1 +0,0 @@
-<div>This is a stub for a duplicate candidate group resolution. (Duplicate candidates {{object.pk}})</div>
\ No newline at end of file
diff --git a/hawc/apps/lit/templates/lit/_duplicate_candidates.html b/hawc/apps/lit/templates/lit/_duplicate_candidates.html
new file mode 100644
index 0000000000..6694a25fef
--- /dev/null
+++ b/hawc/apps/lit/templates/lit/_duplicate_candidates.html
@@ -0,0 +1,18 @@
+<div hx-target="this" hx-swap="delete swap:1s">
+    <form hx-post="{% url 'lit:api:duplicate-resolve-duplicate' object.pk %}">
+        <div>
+        <label>
+            <input type="radio" id="none-{{object.pk}}" name="resolution" value="none" />
+            No duplicates
+        </label>
+        </div>
+        {% for candidate in object.candidates %}
+        <div>
+            <input type="radio" id="primary-{{object.pk}}-{{candidate}}" name="resolution" value="{{candidate}}" />
+            <div style="display:inline-block;">{{candidate}}</div>
+        </div>
+        {% endfor %}
+        <textarea name="notes" placeholder="Notes"></textarea>
+        <button type="submit">Resolve</button>
+    </form>
+</div>
\ No newline at end of file
diff --git a/hawc/apps/lit/templates/lit/duplicate_candidates.html b/hawc/apps/lit/templates/lit/duplicate_candidates.html
index 7c89df53d0..b78dc9e70d 100644
--- a/hawc/apps/lit/templates/lit/duplicate_candidates.html
+++ b/hawc/apps/lit/templates/lit/duplicate_candidates.html
@@ -4,6 +4,9 @@
 {% block content %}
   <div class="d-flex">
     <h2>Duplicate candidates</h2>
+    {% actions %}
+      <a class="dropdown-item" href="{% url 'lit:duplicate-task' assessment.pk %}">Run deduplication</a>
+    {% endactions %}
   </div>
   <ul class="list-group list-group-flush my-3">
     {% for object in object_list %}
diff --git a/hawc/apps/lit/templates/lit/duplicate_candidates_2.html b/hawc/apps/lit/templates/lit/duplicate_candidates_2.html
new file mode 100644
index 0000000000..1a3a6f6687
--- /dev/null
+++ b/hawc/apps/lit/templates/lit/duplicate_candidates_2.html
@@ -0,0 +1,28 @@
+{% extends 'assessment-rooted.html' %}
+
+{% load bs4 %}
+{% block content %}
+  <div class="d-flex">
+    <h2>Resolved duplicates</h2>
+  </div>
+  <ul class="list-group list-group-flush my-3">
+    {% for object in object_list %}
+        <div>
+          Group {{object.pk}}
+        </div>
+        <div>
+          {{object.resolution}}
+        </div>
+        <div>
+          {{object.candidates}}
+        </div>
+        <div>
+          {{object.primary}}
+        </div>
+        <div>
+          {{object.notes}}
+        </div>
+    {% endfor %}
+  </ul>
+  {% include "includes/paginator.html" with plural_object_name="duplicate groups" %}
+{% endblock content %}
diff --git a/hawc/apps/lit/urls.py b/hawc/apps/lit/urls.py
index f5d2fba513..0c913227be 100644
--- a/hawc/apps/lit/urls.py
+++ b/hawc/apps/lit/urls.py
@@ -8,6 +8,7 @@
 router.register(r"reference", api.ReferenceViewSet, basename="reference")
 router.register(r"search", api.SearchViewSet, basename="search")
 router.register(r"tags", api.ReferenceFilterTagViewSet, basename="tags")
+router.register(r"duplicate", api.DuplicateViewSet, basename="duplicate")
 
 app_name = "lit"
 urlpatterns = [
@@ -168,4 +169,14 @@
         views.DuplicateCandidatesList.as_view(),
         name="duplicate-candidates",
     ),
+    path(
+        "assessment/<int:pk>/duplicate-candidates2/",
+        views.DuplicateCandidatesList2.as_view(),
+        name="duplicate-candidates2",
+    ),
+    path(
+        "assessment/<int:pk>/duplicate-task/",
+        views.DuplicateTask.as_view(),
+        name="duplicate-task",
+    ),
 ]
diff --git a/hawc/apps/lit/views.py b/hawc/apps/lit/views.py
index 7f3f081bc6..638581b530 100644
--- a/hawc/apps/lit/views.py
+++ b/hawc/apps/lit/views.py
@@ -10,7 +10,7 @@
 from django.template import loader
 from django.urls import reverse, reverse_lazy
 from django.utils.decorators import method_decorator
-from django.views.generic import TemplateView
+from django.views.generic import TemplateView, View
 
 from ..assessment.constants import AssessmentViewPermissions
 from ..assessment.models import Assessment
@@ -27,6 +27,7 @@
     BaseUpdate,
     create_object_log,
     htmx_required,
+    MessageMixin
 )
 from ..udf.cache import TagCache
 from . import constants, filterset, forms, models
@@ -1266,4 +1267,28 @@ def get_queryset(self):
 
     def get_context_data(self, **kwargs):
         context = super().get_context_data(**kwargs)
+        return context
+
+class DuplicateCandidatesList2(BaseList):
+    parent_model = Assessment
+    model = models.DuplicateCandidates
+    template_name = "lit/duplicate_candidates_2.html"
+    breadcrumb_active_name = "Resolved duplicates"
+
+    def get_queryset(self):
+        return (
+            super().get_queryset().filter(assessment=self.assessment).exclude(resolution=constants.DuplicateResolution.UNRESOLVED)
+        )
 
+    def get_context_data(self, **kwargs):
+        context = super().get_context_data(**kwargs)
+        return context
+
+class DuplicateTask(MessageMixin, View):
+    success_message = "Deduplication requested."
+
+    def get(self, request, *args, **kwargs):
+        assessment = get_object_or_404(Assessment, pk=kwargs["pk"])
+        models.DuplicateCandidates.foobar(assessment)
+        self.send_message()
+        return HttpResponseRedirect(request.META.get('HTTP_REFERER', '/'))
\ No newline at end of file

From 2ac87e6f1a1232dde9bb25369da949dbb635aeb4 Mon Sep 17 00:00:00 2001
From: Daniel Rabstejnek <rabstejnek@gmail.com>
Date: Tue, 11 Feb 2025 09:04:08 -0500
Subject: [PATCH 3/5] Changed model fields for candidates/primary to relation
 based, cleanup & improvements

---
 hawc/apps/lit/api.py                          |  16 ++-
 hawc/apps/lit/constants.py                    |   4 +-
 ...0025_dedupesettings_duplicatecandidates.py |   1 -
 .../lit/migrations/0026_reference_hidden.py   |   1 -
 ...ategroup_delete_dedupesettings_and_more.py |  80 +++++++++++
 hawc/apps/lit/models.py                       | 125 +++++++++---------
 .../templates/lit/_duplicate_candidates.html  |  32 ++---
 .../templates/lit/duplicate_candidates.html   |   2 +-
 .../templates/lit/duplicate_candidates_2.html |  30 ++---
 hawc/apps/lit/views.py                        |  24 ++--
 10 files changed, 200 insertions(+), 115 deletions(-)
 create mode 100644 hawc/apps/lit/migrations/0027_duplicatecandidategroup_delete_dedupesettings_and_more.py

diff --git a/hawc/apps/lit/api.py b/hawc/apps/lit/api.py
index f0aa33cec0..ddec6cea15 100644
--- a/hawc/apps/lit/api.py
+++ b/hawc/apps/lit/api.py
@@ -428,12 +428,10 @@ def id_search(self, request, db_id: str, id: str):
         )
 
 
-
-
 class DuplicateViewSet(
     BaseAssessmentViewSet,
 ):
-    model = models.DuplicateCandidates
+    model = models.DuplicateCandidateGroup
     http_method_names = ["post"]
 
     @action(
@@ -445,9 +443,13 @@ def resolve_duplicate(self, request, pk):
         if not assessment.user_can_edit_object(self.request.user):
             raise PermissionDenied()
         resolution = request.POST.get("resolution")
-        notes = request.POST.get("notes","")
+        notes = request.POST.get("notes", "")
         if resolution == "none":
-            instance.resolve(resolution=constants.DuplicateResolution.FALSE_POSITIVE,notes=notes)
-        if (resolution:=tryParseInt(resolution)) is not None:
-            instance.resolve(resolution=constants.DuplicateResolution.RESOLVED,primary=resolution,notes=notes)
+            instance.resolve(resolution=constants.DuplicateResolution.FALSE_POSITIVE, notes=notes)
+        if (resolution := tryParseInt(resolution)) is not None:
+            instance.resolve(
+                resolution=constants.DuplicateResolution.RESOLVED,
+                primary_id=resolution,
+                notes=notes,
+            )
         return Response({"status": "ok"})
diff --git a/hawc/apps/lit/constants.py b/hawc/apps/lit/constants.py
index 039980837e..21b2465435 100644
--- a/hawc/apps/lit/constants.py
+++ b/hawc/apps/lit/constants.py
@@ -28,11 +28,13 @@ class SearchType(models.TextChoices):
     SEARCH = "s", "Search"
     IMPORT = "i", "Import"
 
+
 class DuplicateResolution(models.IntegerChoices):
     UNRESOLVED = 0, "Unresolved"
-    RESOLVED = 1, "Resolved" # TODO: change to "primary identified"
+    RESOLVED = 1, "Resolved"  # TODO: change to "primary identified"
     FALSE_POSITIVE = 2, "False positive"
 
+
 # generalized/adapted from https://www.crossref.org/blog/dois-and-matching-regular-expressions/
 DOI_EXACT = re.compile(r"^10\.\d{4,9}/[^\s]+$")
 DOI_EXTRACT = re.compile(r"10\.\d{4,9}/[^\s]+")
diff --git a/hawc/apps/lit/migrations/0025_dedupesettings_duplicatecandidates.py b/hawc/apps/lit/migrations/0025_dedupesettings_duplicatecandidates.py
index 7e28051935..29322ed29a 100644
--- a/hawc/apps/lit/migrations/0025_dedupesettings_duplicatecandidates.py
+++ b/hawc/apps/lit/migrations/0025_dedupesettings_duplicatecandidates.py
@@ -7,7 +7,6 @@
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("assessment", "0047_alter_labeleditem_options"),
         ("lit", "0024_workflows"),
diff --git a/hawc/apps/lit/migrations/0026_reference_hidden.py b/hawc/apps/lit/migrations/0026_reference_hidden.py
index a28c209ed9..91dddf36a1 100644
--- a/hawc/apps/lit/migrations/0026_reference_hidden.py
+++ b/hawc/apps/lit/migrations/0026_reference_hidden.py
@@ -4,7 +4,6 @@
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("lit", "0025_dedupesettings_duplicatecandidates"),
     ]
diff --git a/hawc/apps/lit/migrations/0027_duplicatecandidategroup_delete_dedupesettings_and_more.py b/hawc/apps/lit/migrations/0027_duplicatecandidategroup_delete_dedupesettings_and_more.py
new file mode 100644
index 0000000000..6b0c400c7d
--- /dev/null
+++ b/hawc/apps/lit/migrations/0027_duplicatecandidategroup_delete_dedupesettings_and_more.py
@@ -0,0 +1,80 @@
+# Generated by Django 5.1.4 on 2025-02-10 12:18
+
+import django.db.models.deletion
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("assessment", "0047_alter_labeleditem_options"),
+        ("lit", "0026_reference_hidden"),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="DuplicateCandidateGroup",
+            fields=[
+                (
+                    "id",
+                    models.AutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "resolution",
+                    models.PositiveSmallIntegerField(
+                        choices=[
+                            (0, "Unresolved"),
+                            (1, "Resolved"),
+                            (2, "False positive"),
+                        ],
+                        default=0,
+                    ),
+                ),
+                ("notes", models.TextField(blank=True)),
+                ("created", models.DateTimeField(auto_now_add=True)),
+                ("last_updated", models.DateTimeField(auto_now=True)),
+                (
+                    "assessment",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        related_name="duplicates",
+                        to="assessment.assessment",
+                    ),
+                ),
+                (
+                    "candidates",
+                    models.ManyToManyField(related_name="duplicate_candidates", to="lit.reference"),
+                ),
+                (
+                    "primary",
+                    models.ForeignKey(
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="duplicate_primaries",
+                        to="lit.reference",
+                    ),
+                ),
+                (
+                    "resolving_user",
+                    models.ForeignKey(
+                        null=True,
+                        on_delete=django.db.models.deletion.SET_NULL,
+                        related_name="resolved_duplicates",
+                        to=settings.AUTH_USER_MODEL,
+                    ),
+                ),
+            ],
+        ),
+        migrations.DeleteModel(
+            name="DedupeSettings",
+        ),
+        migrations.DeleteModel(
+            name="DuplicateCandidates",
+        ),
+    ]
diff --git a/hawc/apps/lit/models.py b/hawc/apps/lit/models.py
index 1b1420bf0d..5cd5534e9e 100644
--- a/hawc/apps/lit/models.py
+++ b/hawc/apps/lit/models.py
@@ -1,12 +1,12 @@
 import html
 import json
 import logging
+import random
 import re
 from copy import copy
 from math import ceil
 from typing import Self
 from urllib import parse
-import random
 
 from celery import chain
 from celery.result import ResultBase
@@ -1460,100 +1460,93 @@ def get_description(self) -> str:
         )
 
 
-# add parameters/select on literatureassessment model
-class DedupeSettings(models.Model):
-    # deduper for use in an assessment
-    # for first pass maybe we just have a global deduper, or static choices, so that we don't have to build this
-    assessment:"Assessment"
-    parameters:dict # list of parameters for deduplication? ie schema of dedupe modules to use?
-
-    def build_deduper(self):
-        # return deduper instance using self.parameters
-        return
-
-
-# SOFT DELETES
-
-class SortedArrayField(ArrayField):
-    pass
-
-class DuplicateCandidates(models.Model):
+class DuplicateCandidateGroup(models.Model):
     assessment = models.ForeignKey(
         "assessment.Assessment", on_delete=models.CASCADE, related_name="duplicates"
     )
     resolution = models.PositiveSmallIntegerField(
-        choices=constants.DuplicateResolution,
-        default=constants.DuplicateResolution.UNRESOLVED
+        choices=constants.DuplicateResolution, default=constants.DuplicateResolution.UNRESOLVED
+    )
+    resolving_user = models.ForeignKey(
+        HAWCUser, null=True, on_delete=models.SET_NULL, related_name="resolved_duplicates"
+    )
+    candidates = models.ManyToManyField(Reference, related_name="duplicate_candidates")
+    primary = models.ForeignKey(
+        Reference, null=True, on_delete=models.SET_NULL, related_name="duplicate_primaries"
     )
-    resolving_user = models.ForeignKey(HAWCUser, null=True, on_delete=models.SET_NULL, related_name="resolved_duplicates")
-    candidates = ArrayField(models.IntegerField(),unique=True)
-    primary = models.IntegerField(null=True)
     notes = models.TextField(blank=True)
     created = models.DateTimeField(auto_now_add=True)
     last_updated = models.DateTimeField(auto_now=True)
 
+    @property
+    def secondaries(self):
+        return self.candidates.exclude(pk=self.primary_id)
+
     def get_assessment(self):
         return self.assessment
 
     @classmethod
-    def foobar(cls,assessment):
-        references = assessment.references.values("pk","title")
-        candidate_groups = cls.random_execute(references)
-        cls.objects.bulk_create([cls(assessment=assessment,candidates=[ref["pk"] for ref in group]) for group in candidate_groups])
+    def validate_candidates(cls, candidates: list[int]):
+        qs = cls.objects.annotate(candidates_count=models.Count("candidates")).filter(
+            candidates_count=len(candidates)
+        )
+        for candidate in candidates:
+            qs = qs.filter(candidates=candidate)
+        return not qs.exists()
 
     @classmethod
-    def random_execute(cls,references)->list[list[dict]]:
+    def find_duplicate_candidate_groups(cls, references) -> list[list[dict]]:
         num_candidates = 2
-        if len(references)<num_candidates:
+        if len(references) < num_candidates:
             return []
-        num_groups = min(3,len(references)/num_candidates)
-        return [random.choices(references,k=num_candidates) for i in range(num_groups)]
+        num_groups = min(3, len(references) / num_candidates)
+        return [random.choices(references, k=num_candidates) for i in range(num_groups)]
+
+    @classmethod
+    def create_duplicate_candidate_groups(cls, assessment) -> list["DuplicateCandidateGroup"]:
+        references = assessment.references.values("pk", "title")
+        candidate_groups = cls.find_duplicate_candidate_groups(references)
+        candidate_groups = [
+            group
+            for group in candidate_groups
+            if cls.validate_candidates([ref["pk"] for ref in group])
+        ]
+        objs = cls.objects.bulk_create([cls(assessment=assessment) for group in candidate_groups])
+        m2m_objs = cls.candidates.through.objects.bulk_create(
+            [
+                cls.candidates.through(duplicatecandidategroup_id=obj.pk, reference_id=ref["pk"])
+                for obj, group in zip(objs, candidate_groups, strict=False)
+                for ref in group
+            ]
+        )
 
-    def generate_unique_identifier(self):
-        return sorted(self.candidates)
-    
     def _update_references(self):
-        # TODO also make primary not hidden? may be unnecessary
-        duplicate_ids = set(self.candidates)-{self.primary}
+        duplicate_ids = self.secondaries.values_list("pk", flat=True)
         self.assessment.references.filter(pk__in=duplicate_ids).update(hidden=True)
-
-    def resolve(self,resolution:constants.DuplicateResolution,primary:int=None,notes:str=""):
+        # if a "hidden" reference was selected as primary, unhide it
+        if self.primary.hidden:
+            self.primary.hidden = False
+            self.primary.save()
+
+    def resolve(
+        self,
+        resolution: constants.DuplicateResolution,
+        primary_id: int | None = None,
+        notes: str = "",
+    ):
         if resolution == constants.DuplicateResolution.UNRESOLVED:
             raise ValueError("Resolution must not be unresolved.")
         if resolution == constants.DuplicateResolution.RESOLVED:
-            if primary is None:
+            if primary_id is None:
                 raise ValueError("Primary must not be None if duplicate identified.")
-            if primary not in self.candidates:
+            if primary_id not in self.candidates.values_list("pk", flat=True):
                 raise ValueError("Primary must be a candidate.")
-            self.primary = primary
-            #self._update_references()
+            self.primary_id = primary_id
+            self._update_references()
         self.resolution = resolution
         self.notes = notes
         self.save()
 
-# where to put execute method? literatureassessment, manager for dupes model
-
-
-# DuplicateCandidateGroup
-
-"""
-WORKFLOW
-
-User defines deduper for use in assessment
-User executes a session that uses a defined deduper
-Session stores list of identified candidate duplicate groups
-User resolves duplicates in a session; if group status != unresolved, it shows up on this page
-Perhaps a seperate session page of resolved groups? ie an "in progress" list view and a "done" list view
-Multiple resolutions at once? Or more like screen page in LLR where its do one, click for next (look at conflict resolution)
-Should this workflow do anything proactive? ie lets say a candidate group is identified false positive, is it a big deal if it shows up again if a user executes another session w/ same settings? (yes)
-Single user right? Not like conflict resolution? THIS IS CORRECT
-Do we want this workflow to also happen on import? That would look slightly different
-    Though maybe we could just have it happen automatically AFTER import, that way it would use the same workflow
-    If used on import, do we add "choose a deduper" option to created search? or maybe "default" attribute to deduper, whichever one is "default" is used?
-    Each assessment has undeletable "default" deduper, maybe add noop setting choice for deduper for people who don't want it running on imports?
-"""
-
-
 
 reversion.register(LiteratureAssessment)
 reversion.register(Search)
diff --git a/hawc/apps/lit/templates/lit/_duplicate_candidates.html b/hawc/apps/lit/templates/lit/_duplicate_candidates.html
index 6694a25fef..c0ac4e33a1 100644
--- a/hawc/apps/lit/templates/lit/_duplicate_candidates.html
+++ b/hawc/apps/lit/templates/lit/_duplicate_candidates.html
@@ -1,18 +1,20 @@
 <div hx-target="this" hx-swap="delete swap:1s">
-    <form hx-post="{% url 'lit:api:duplicate-resolve-duplicate' object.pk %}">
-        <div>
-        <label>
-            <input type="radio" id="none-{{object.pk}}" name="resolution" value="none" />
-            No duplicates
-        </label>
+  <form hx-post="{% url 'lit:api:duplicate-resolve-duplicate' object.pk %}">
+    <div>
+      <label>
+        <input type="radio" id="none-{{object.pk}}" name="resolution" value="none" />
+        No duplicates
+      </label>
+    </div>
+    {% for candidate in object.candidates.all %}
+      <div>
+        <input type="radio" id="primary-{{object.pk}}-{{candidate.pk}}" name="resolution" value="{{candidate.pk}}" />
+        <div style="display:inline-block; vertical-align: top;">
+          {% include 'lit/_reference_with_tags.html' with ref=candidate %}
         </div>
-        {% for candidate in object.candidates %}
-        <div>
-            <input type="radio" id="primary-{{object.pk}}-{{candidate}}" name="resolution" value="{{candidate}}" />
-            <div style="display:inline-block;">{{candidate}}</div>
-        </div>
-        {% endfor %}
-        <textarea name="notes" placeholder="Notes"></textarea>
-        <button type="submit">Resolve</button>
-    </form>
+      </div>
+    {% endfor %}
+    <textarea name="notes" placeholder="Notes"></textarea>
+    <button type="submit">Resolve</button>
+  </form>
 </div>
\ No newline at end of file
diff --git a/hawc/apps/lit/templates/lit/duplicate_candidates.html b/hawc/apps/lit/templates/lit/duplicate_candidates.html
index b78dc9e70d..2c67b17fdb 100644
--- a/hawc/apps/lit/templates/lit/duplicate_candidates.html
+++ b/hawc/apps/lit/templates/lit/duplicate_candidates.html
@@ -10,7 +10,7 @@ <h2>Duplicate candidates</h2>
   </div>
   <ul class="list-group list-group-flush my-3">
     {% for object in object_list %}
-        {% include 'lit/_duplicate_candidates.html' %}
+      {% include 'lit/_duplicate_candidates.html' %}
     {% endfor %}
   </ul>
   {% include "includes/paginator.html" with plural_object_name="duplicate groups" %}
diff --git a/hawc/apps/lit/templates/lit/duplicate_candidates_2.html b/hawc/apps/lit/templates/lit/duplicate_candidates_2.html
index 1a3a6f6687..1732684076 100644
--- a/hawc/apps/lit/templates/lit/duplicate_candidates_2.html
+++ b/hawc/apps/lit/templates/lit/duplicate_candidates_2.html
@@ -7,21 +7,21 @@ <h2>Resolved duplicates</h2>
   </div>
   <ul class="list-group list-group-flush my-3">
     {% for object in object_list %}
-        <div>
-          Group {{object.pk}}
-        </div>
-        <div>
-          {{object.resolution}}
-        </div>
-        <div>
-          {{object.candidates}}
-        </div>
-        <div>
-          {{object.primary}}
-        </div>
-        <div>
-          {{object.notes}}
-        </div>
+      <div>
+        Group {{object.pk}}
+      </div>
+      <div>
+        {{object.resolution}}
+      </div>
+      <div>
+        {{object.candidates}}
+      </div>
+      <div>
+        {{object.primary}}
+      </div>
+      <div>
+        {{object.notes}}
+      </div>
     {% endfor %}
   </ul>
   {% include "includes/paginator.html" with plural_object_name="duplicate groups" %}
diff --git a/hawc/apps/lit/views.py b/hawc/apps/lit/views.py
index 638581b530..97986d8861 100644
--- a/hawc/apps/lit/views.py
+++ b/hawc/apps/lit/views.py
@@ -25,9 +25,9 @@
     BaseFilterList,
     BaseList,
     BaseUpdate,
+    MessageMixin,
     create_object_log,
     htmx_required,
-    MessageMixin
 )
 from ..udf.cache import TagCache
 from . import constants, filterset, forms, models
@@ -1253,42 +1253,50 @@ def venn_reference_list(self, request, *args, **kwargs):
         return render(request, "lit/components/venn_reference_list.html", context=context)
 
 
-
 class DuplicateCandidatesList(BaseList):
     parent_model = Assessment
-    model = models.DuplicateCandidates
+    model = models.DuplicateCandidateGroup
     template_name = "lit/duplicate_candidates.html"
     breadcrumb_active_name = "Duplicate candidates"
 
     def get_queryset(self):
         return (
-            super().get_queryset().filter(assessment=self.assessment).filter(resolution=constants.DuplicateResolution.UNRESOLVED)
+            super()
+            .get_queryset()
+            .filter(assessment=self.assessment)
+            .filter(resolution=constants.DuplicateResolution.UNRESOLVED)
+            .prefetch_related("candidates", "candidates__identifiers", "candidates__tags")
         )
 
     def get_context_data(self, **kwargs):
         context = super().get_context_data(**kwargs)
         return context
 
+
 class DuplicateCandidatesList2(BaseList):
     parent_model = Assessment
-    model = models.DuplicateCandidates
+    model = models.DuplicateCandidateGroup
     template_name = "lit/duplicate_candidates_2.html"
     breadcrumb_active_name = "Resolved duplicates"
 
     def get_queryset(self):
         return (
-            super().get_queryset().filter(assessment=self.assessment).exclude(resolution=constants.DuplicateResolution.UNRESOLVED)
+            super()
+            .get_queryset()
+            .filter(assessment=self.assessment)
+            .exclude(resolution=constants.DuplicateResolution.UNRESOLVED)
         )
 
     def get_context_data(self, **kwargs):
         context = super().get_context_data(**kwargs)
         return context
 
+
 class DuplicateTask(MessageMixin, View):
     success_message = "Deduplication requested."
 
     def get(self, request, *args, **kwargs):
         assessment = get_object_or_404(Assessment, pk=kwargs["pk"])
-        models.DuplicateCandidates.foobar(assessment)
+        models.DuplicateCandidateGroup.create_duplicate_candidate_groups(assessment)
         self.send_message()
-        return HttpResponseRedirect(request.META.get('HTTP_REFERER', '/'))
\ No newline at end of file
+        return HttpResponseRedirect(request.META.get("HTTP_REFERER", "/"))

From 55817a7179606d373a7a809a046c748d497d0dbc Mon Sep 17 00:00:00 2001
From: Daniel Rabstejnek <rabstejnek@gmail.com>
Date: Tue, 11 Feb 2025 14:33:12 -0500
Subject: [PATCH 4/5] Refactor names, create task, cleanup duplicate resolution
 ui

---
 hawc/apps/lit/models.py                       | 20 ++--------
 hawc/apps/lit/tasks.py                        | 26 ++++++++++++
 .../templates/lit/_duplicate_candidates.html  | 40 +++++++++++--------
 .../templates/lit/duplicate_candidates.html   | 17 --------
 .../templates/lit/duplicate_resolution.html   | 21 ++++++++++
 hawc/apps/lit/templates/lit/overview.html     |  1 +
 ...idates_2.html => resolved_duplicates.html} |  3 ++
 hawc/apps/lit/urls.py                         | 18 ++++-----
 hawc/apps/lit/views.py                        | 29 +++++++++-----
 9 files changed, 106 insertions(+), 69 deletions(-)
 delete mode 100644 hawc/apps/lit/templates/lit/duplicate_candidates.html
 create mode 100644 hawc/apps/lit/templates/lit/duplicate_resolution.html
 rename hawc/apps/lit/templates/lit/{duplicate_candidates_2.html => resolved_duplicates.html} (80%)

diff --git a/hawc/apps/lit/models.py b/hawc/apps/lit/models.py
index 5cd5534e9e..f8f030e362 100644
--- a/hawc/apps/lit/models.py
+++ b/hawc/apps/lit/models.py
@@ -1500,25 +1500,11 @@ def find_duplicate_candidate_groups(cls, references) -> list[list[dict]]:
         if len(references) < num_candidates:
             return []
         num_groups = min(3, len(references) / num_candidates)
-        return [random.choices(references, k=num_candidates) for i in range(num_groups)]
+        return [random.choices(references, k=num_candidates) for i in range(num_groups)]  # noqa: S311
 
     @classmethod
-    def create_duplicate_candidate_groups(cls, assessment) -> list["DuplicateCandidateGroup"]:
-        references = assessment.references.values("pk", "title")
-        candidate_groups = cls.find_duplicate_candidate_groups(references)
-        candidate_groups = [
-            group
-            for group in candidate_groups
-            if cls.validate_candidates([ref["pk"] for ref in group])
-        ]
-        objs = cls.objects.bulk_create([cls(assessment=assessment) for group in candidate_groups])
-        m2m_objs = cls.candidates.through.objects.bulk_create(
-            [
-                cls.candidates.through(duplicatecandidategroup_id=obj.pk, reference_id=ref["pk"])
-                for obj, group in zip(objs, candidate_groups, strict=False)
-                for ref in group
-            ]
-        )
+    def create_duplicate_candidate_groups(cls, assessment_id: int):
+        tasks.create_duplicate_candidate_groups.delay(assessment_id)
 
     def _update_references(self):
         duplicate_ids = self.secondaries.values_list("pk", flat=True)
diff --git a/hawc/apps/lit/tasks.py b/hawc/apps/lit/tasks.py
index 3304a67cac..37535e0d7b 100644
--- a/hawc/apps/lit/tasks.py
+++ b/hawc/apps/lit/tasks.py
@@ -142,3 +142,29 @@ def fix_pubmed_without_content():
     logger.info(f"Attempting to update pubmed content for {num_ids} identifiers")
     if num_ids > 0:
         Identifiers.update_pubmed_content(ids)
+
+
+@shared_task
+def create_duplicate_candidate_groups(assessment_id: int):
+    DuplicateCandidateGroup = apps.get_model("lit", "DuplicateCandidateGroup")
+    assessment = apps.get_model("assessment", "Assessment").objects.get(pk=assessment_id)
+    references = assessment.references.values("pk", "title")
+    candidate_groups = DuplicateCandidateGroup.find_duplicate_candidate_groups(references)
+    candidate_groups = [
+        group
+        for group in candidate_groups
+        if DuplicateCandidateGroup.validate_candidates([ref["pk"] for ref in group])
+    ]
+    with transaction.atomic():
+        objs = DuplicateCandidateGroup.objects.bulk_create(
+            [DuplicateCandidateGroup(assessment=assessment) for group in candidate_groups]
+        )
+        DuplicateCandidateGroup.candidates.through.objects.bulk_create(
+            [
+                DuplicateCandidateGroup.candidates.through(
+                    duplicatecandidategroup_id=obj.pk, reference_id=ref["pk"]
+                )
+                for obj, group in zip(objs, candidate_groups, strict=False)
+                for ref in group
+            ]
+        )
diff --git a/hawc/apps/lit/templates/lit/_duplicate_candidates.html b/hawc/apps/lit/templates/lit/_duplicate_candidates.html
index c0ac4e33a1..f85177de08 100644
--- a/hawc/apps/lit/templates/lit/_duplicate_candidates.html
+++ b/hawc/apps/lit/templates/lit/_duplicate_candidates.html
@@ -1,20 +1,26 @@
 <div hx-target="this" hx-swap="delete swap:1s">
-  <form hx-post="{% url 'lit:api:duplicate-resolve-duplicate' object.pk %}">
-    <div>
-      <label>
-        <input type="radio" id="none-{{object.pk}}" name="resolution" value="none" />
-        No duplicates
-      </label>
-    </div>
-    {% for candidate in object.candidates.all %}
-      <div>
-        <input type="radio" id="primary-{{object.pk}}-{{candidate.pk}}" name="resolution" value="{{candidate.pk}}" />
-        <div style="display:inline-block; vertical-align: top;">
-          {% include 'lit/_reference_with_tags.html' with ref=candidate %}
+  <div class="carddd">
+    <div class="card-bodydd">
+      <form hx-post="{% url 'lit:api:duplicate-resolve-duplicate' object.pk %}">
+        <div class="form-group">
+          <input type="radio" id="none-{{object.pk}}" name="resolution" value="none" />
+          <label id="none-{{object.pk}}">
+            No duplicates
+          </label>
+        </div>
+        {% for candidate in object.candidates.all %}
+          <div class="form-group">
+            <input type="radio" id="primary-{{object.pk}}-{{candidate.pk}}" name="resolution" value="{{candidate.pk}}" />
+            <div class="d-inline-block align-top">
+              {% include 'lit/_reference_with_tags.html' with ref=candidate %}
+            </div>
+          </div>
+        {% endfor %}
+        <div class="form-group">
+          <textarea class="form-control" name="notes" placeholder="Notes"></textarea>
         </div>
-      </div>
-    {% endfor %}
-    <textarea name="notes" placeholder="Notes"></textarea>
-    <button type="submit">Resolve</button>
-  </form>
+        <button type="submit" class="btn btn-primary">Resolve</button>
+      </form>
+    </div>
+  </div>
 </div>
\ No newline at end of file
diff --git a/hawc/apps/lit/templates/lit/duplicate_candidates.html b/hawc/apps/lit/templates/lit/duplicate_candidates.html
deleted file mode 100644
index 2c67b17fdb..0000000000
--- a/hawc/apps/lit/templates/lit/duplicate_candidates.html
+++ /dev/null
@@ -1,17 +0,0 @@
-{% extends 'assessment-rooted.html' %}
-
-{% load bs4 %}
-{% block content %}
-  <div class="d-flex">
-    <h2>Duplicate candidates</h2>
-    {% actions %}
-      <a class="dropdown-item" href="{% url 'lit:duplicate-task' assessment.pk %}">Run deduplication</a>
-    {% endactions %}
-  </div>
-  <ul class="list-group list-group-flush my-3">
-    {% for object in object_list %}
-      {% include 'lit/_duplicate_candidates.html' %}
-    {% endfor %}
-  </ul>
-  {% include "includes/paginator.html" with plural_object_name="duplicate groups" %}
-{% endblock content %}
diff --git a/hawc/apps/lit/templates/lit/duplicate_resolution.html b/hawc/apps/lit/templates/lit/duplicate_resolution.html
new file mode 100644
index 0000000000..0b49227202
--- /dev/null
+++ b/hawc/apps/lit/templates/lit/duplicate_resolution.html
@@ -0,0 +1,21 @@
+{% extends 'assessment-rooted.html' %}
+
+{% load bs4 %}
+{% block content %}
+  <div class="d-flex">
+    <h2>Duplicate candidates</h2>
+    {% actions %}
+      <a class="dropdown-item" href="{% url 'lit:resolved-duplicates' assessment.pk %}">Resolved duplicates</a>
+      <a class="dropdown-item" href="{% url 'lit:identify-duplicates' assessment.pk %}">Identify duplicates</a>
+    {% endactions %}
+  </div>
+  <ul class="list-group list-group-flush my-3">
+    {% for object in object_list %}
+      <li class="list-group-item conflict-reference-li {% if forloop.first %} pb-4 {% else %} py-4 {% endif %}"
+          hx-target="this" hx-swap="delete swap:1s">
+        {% include 'lit/_duplicate_candidates.html' %}
+      </li>
+    {% endfor %}
+  </ul>
+  {% include "includes/paginator.html" with plural_object_name="duplicate groups" %}
+{% endblock content %}
diff --git a/hawc/apps/lit/templates/lit/overview.html b/hawc/apps/lit/templates/lit/overview.html
index 57a7bcbb31..37ba401033 100644
--- a/hawc/apps/lit/templates/lit/overview.html
+++ b/hawc/apps/lit/templates/lit/overview.html
@@ -27,6 +27,7 @@ <h2>Literature Review</h2>
         <a class="dropdown-item" href="{% url 'lit:tag' assessment.pk %}?search={{manual_import.pk}}">Tag manually added references</a>
         <a class="dropdown-item" href="{% url 'lit:tag' assessment.pk %}?untagged=on">Tag untagged references</a>
         <a class="dropdown-item" href="{% url 'lit:ref_upload' assessment.pk %}">Upload full text URLs</a>
+        <a class="dropdown-item" href="{% url 'lit:duplicate-resolution' assessment.pk %}">Duplicate resolution</a>
         <div class="dropdown-divider"></div>
         <span class="dropdown-header">Exports</span>
         <a class="dropdown-item" href="{% url 'lit:api:assessment-reference-export' assessment.pk %}?format=xlsx">Download all references</a>
diff --git a/hawc/apps/lit/templates/lit/duplicate_candidates_2.html b/hawc/apps/lit/templates/lit/resolved_duplicates.html
similarity index 80%
rename from hawc/apps/lit/templates/lit/duplicate_candidates_2.html
rename to hawc/apps/lit/templates/lit/resolved_duplicates.html
index 1732684076..2a833bae9e 100644
--- a/hawc/apps/lit/templates/lit/duplicate_candidates_2.html
+++ b/hawc/apps/lit/templates/lit/resolved_duplicates.html
@@ -4,6 +4,9 @@
 {% block content %}
   <div class="d-flex">
     <h2>Resolved duplicates</h2>
+    {% actions %}
+      <a class="dropdown-item" href="{% url 'lit:duplicate-resolution' assessment.pk %}">Duplicate resolution</a>
+    {% endactions %}
   </div>
   <ul class="list-group list-group-flush my-3">
     {% for object in object_list %}
diff --git a/hawc/apps/lit/urls.py b/hawc/apps/lit/urls.py
index 0c913227be..560e926a7c 100644
--- a/hawc/apps/lit/urls.py
+++ b/hawc/apps/lit/urls.py
@@ -165,18 +165,18 @@
     ),
     path("api/", include((router.urls, "api"))),
     path(
-        "assessment/<int:pk>/duplicate-candidates/",
-        views.DuplicateCandidatesList.as_view(),
-        name="duplicate-candidates",
+        "assessment/<int:pk>/duplicate-resolution/",
+        views.DuplicateResolution.as_view(),
+        name="duplicate-resolution",
     ),
     path(
-        "assessment/<int:pk>/duplicate-candidates2/",
-        views.DuplicateCandidatesList2.as_view(),
-        name="duplicate-candidates2",
+        "assessment/<int:pk>/resolved-duplicates/",
+        views.ResolvedDuplicates.as_view(),
+        name="resolved-duplicates",
     ),
     path(
-        "assessment/<int:pk>/duplicate-task/",
-        views.DuplicateTask.as_view(),
-        name="duplicate-task",
+        "assessment/<int:pk>/identify-duplicates/",
+        views.IdentifyDuplicates.as_view(),
+        name="identify-duplicates",
     ),
 ]
diff --git a/hawc/apps/lit/views.py b/hawc/apps/lit/views.py
index 97986d8861..d18e9a4edc 100644
--- a/hawc/apps/lit/views.py
+++ b/hawc/apps/lit/views.py
@@ -1253,11 +1253,13 @@ def venn_reference_list(self, request, *args, **kwargs):
         return render(request, "lit/components/venn_reference_list.html", context=context)
 
 
-class DuplicateCandidatesList(BaseList):
+class DuplicateResolution(BaseList):
     parent_model = Assessment
     model = models.DuplicateCandidateGroup
-    template_name = "lit/duplicate_candidates.html"
-    breadcrumb_active_name = "Duplicate candidates"
+    template_name = "lit/duplicate_resolution.html"
+    breadcrumb_active_name = "Duplicate resolution"
+
+    paginate_by = 5
 
     def get_queryset(self):
         return (
@@ -1270,13 +1272,16 @@ def get_queryset(self):
 
     def get_context_data(self, **kwargs):
         context = super().get_context_data(**kwargs)
+        context["breadcrumbs"] = lit_overview_crumbs(
+            self.request.user, self.assessment, "Duplicate resolution"
+        )
         return context
 
 
-class DuplicateCandidatesList2(BaseList):
+class ResolvedDuplicates(BaseList):
     parent_model = Assessment
     model = models.DuplicateCandidateGroup
-    template_name = "lit/duplicate_candidates_2.html"
+    template_name = "lit/resolved_duplicates.html"
     breadcrumb_active_name = "Resolved duplicates"
 
     def get_queryset(self):
@@ -1289,14 +1294,20 @@ def get_queryset(self):
 
     def get_context_data(self, **kwargs):
         context = super().get_context_data(**kwargs)
+        context["breadcrumbs"] = lit_overview_crumbs(
+            self.request.user, self.assessment, "Resolved duplicates"
+        )
         return context
 
 
-class DuplicateTask(MessageMixin, View):
-    success_message = "Deduplication requested."
+class IdentifyDuplicates(MessageMixin, View):
+    success_message = "Duplicate identification requested."
 
     def get(self, request, *args, **kwargs):
         assessment = get_object_or_404(Assessment, pk=kwargs["pk"])
-        models.DuplicateCandidateGroup.create_duplicate_candidate_groups(assessment)
+        if not assessment.user_is_team_member_or_higher(request.user):
+            raise PermissionDenied()
+        url = reverse("lit:duplicate-resolution", args=(assessment.pk,))
+        models.DuplicateCandidateGroup.create_duplicate_candidate_groups(assessment.pk)
         self.send_message()
-        return HttpResponseRedirect(request.META.get("HTTP_REFERER", "/"))
+        return HttpResponseRedirect(url)

From fc55194ac1328bc1cd9adcf0883a071f58eae9d4 Mon Sep 17 00:00:00 2001
From: Daniel Rabstejnek <rabstejnek@gmail.com>
Date: Tue, 11 Feb 2025 14:58:05 -0500
Subject: [PATCH 5/5] Updated permissions

---
 hawc/apps/lit/api.py                          |  9 +++++--
 hawc/apps/lit/models.py                       |  2 ++
 .../templates/lit/_duplicate_candidates.html  | 26 -------------------
 .../templates/lit/duplicate_resolution.html   | 25 +++++++++++++++++-
 hawc/apps/lit/templates/lit/overview.html     |  6 ++++-
 .../templates/lit/resolved_duplicates.html    |  8 +++---
 hawc/apps/lit/views.py                        |  4 ++-
 7 files changed, 46 insertions(+), 34 deletions(-)
 delete mode 100644 hawc/apps/lit/templates/lit/_duplicate_candidates.html

diff --git a/hawc/apps/lit/api.py b/hawc/apps/lit/api.py
index ddec6cea15..18ef4ed3ed 100644
--- a/hawc/apps/lit/api.py
+++ b/hawc/apps/lit/api.py
@@ -440,15 +440,20 @@ class DuplicateViewSet(
     def resolve_duplicate(self, request, pk):
         instance = self.get_object()
         assessment = instance.assessment
-        if not assessment.user_can_edit_object(self.request.user):
+        if not assessment.user_can_edit_object(request.user):
             raise PermissionDenied()
         resolution = request.POST.get("resolution")
         notes = request.POST.get("notes", "")
         if resolution == "none":
-            instance.resolve(resolution=constants.DuplicateResolution.FALSE_POSITIVE, notes=notes)
+            instance.resolve(
+                resolution=constants.DuplicateResolution.FALSE_POSITIVE,
+                resolving_user=request.user,
+                notes=notes,
+            )
         if (resolution := tryParseInt(resolution)) is not None:
             instance.resolve(
                 resolution=constants.DuplicateResolution.RESOLVED,
+                resolving_user=request.user,
                 primary_id=resolution,
                 notes=notes,
             )
diff --git a/hawc/apps/lit/models.py b/hawc/apps/lit/models.py
index f8f030e362..d21c36f67f 100644
--- a/hawc/apps/lit/models.py
+++ b/hawc/apps/lit/models.py
@@ -1517,6 +1517,7 @@ def _update_references(self):
     def resolve(
         self,
         resolution: constants.DuplicateResolution,
+        resolving_user: HAWCUser,
         primary_id: int | None = None,
         notes: str = "",
     ):
@@ -1530,6 +1531,7 @@ def resolve(
             self.primary_id = primary_id
             self._update_references()
         self.resolution = resolution
+        self.resolving_user = resolving_user
         self.notes = notes
         self.save()
 
diff --git a/hawc/apps/lit/templates/lit/_duplicate_candidates.html b/hawc/apps/lit/templates/lit/_duplicate_candidates.html
deleted file mode 100644
index f85177de08..0000000000
--- a/hawc/apps/lit/templates/lit/_duplicate_candidates.html
+++ /dev/null
@@ -1,26 +0,0 @@
-<div hx-target="this" hx-swap="delete swap:1s">
-  <div class="carddd">
-    <div class="card-bodydd">
-      <form hx-post="{% url 'lit:api:duplicate-resolve-duplicate' object.pk %}">
-        <div class="form-group">
-          <input type="radio" id="none-{{object.pk}}" name="resolution" value="none" />
-          <label id="none-{{object.pk}}">
-            No duplicates
-          </label>
-        </div>
-        {% for candidate in object.candidates.all %}
-          <div class="form-group">
-            <input type="radio" id="primary-{{object.pk}}-{{candidate.pk}}" name="resolution" value="{{candidate.pk}}" />
-            <div class="d-inline-block align-top">
-              {% include 'lit/_reference_with_tags.html' with ref=candidate %}
-            </div>
-          </div>
-        {% endfor %}
-        <div class="form-group">
-          <textarea class="form-control" name="notes" placeholder="Notes"></textarea>
-        </div>
-        <button type="submit" class="btn btn-primary">Resolve</button>
-      </form>
-    </div>
-  </div>
-</div>
\ No newline at end of file
diff --git a/hawc/apps/lit/templates/lit/duplicate_resolution.html b/hawc/apps/lit/templates/lit/duplicate_resolution.html
index 0b49227202..e2b6e0c7d7 100644
--- a/hawc/apps/lit/templates/lit/duplicate_resolution.html
+++ b/hawc/apps/lit/templates/lit/duplicate_resolution.html
@@ -13,7 +13,30 @@ <h2>Duplicate candidates</h2>
     {% for object in object_list %}
       <li class="list-group-item conflict-reference-li {% if forloop.first %} pb-4 {% else %} py-4 {% endif %}"
           hx-target="this" hx-swap="delete swap:1s">
-        {% include 'lit/_duplicate_candidates.html' %}
+        <div class="card">
+          <div class="card-body">
+            <form hx-post="{% url 'lit:api:duplicate-resolve-duplicate' object.pk %}">
+              <div class="form-group">
+                <input type="radio" id="none-{{object.pk}}" name="resolution" value="none" />
+                <label id="none-{{object.pk}}">
+                  No duplicates
+                </label>
+              </div>
+              {% for candidate in object.candidates.all %}
+                <div class="form-group">
+                  <input type="radio" id="primary-{{object.pk}}-{{candidate.pk}}" name="resolution" value="{{candidate.pk}}" />
+                  <div class="d-inline-block align-top">
+                    {% include 'lit/_reference_with_tags.html' with ref=candidate %}
+                  </div>
+                </div>
+              {% endfor %}
+              <div class="form-group">
+                <textarea class="form-control" name="notes" placeholder="Notes"></textarea>
+              </div>
+              <button type="submit" class="btn btn-primary">Resolve</button>
+            </form>
+          </div>
+        </div>
       </li>
     {% endfor %}
   </ul>
diff --git a/hawc/apps/lit/templates/lit/overview.html b/hawc/apps/lit/templates/lit/overview.html
index 37ba401033..d83f3c885f 100644
--- a/hawc/apps/lit/templates/lit/overview.html
+++ b/hawc/apps/lit/templates/lit/overview.html
@@ -27,7 +27,11 @@ <h2>Literature Review</h2>
         <a class="dropdown-item" href="{% url 'lit:tag' assessment.pk %}?search={{manual_import.pk}}">Tag manually added references</a>
         <a class="dropdown-item" href="{% url 'lit:tag' assessment.pk %}?untagged=on">Tag untagged references</a>
         <a class="dropdown-item" href="{% url 'lit:ref_upload' assessment.pk %}">Upload full text URLs</a>
-        <a class="dropdown-item" href="{% url 'lit:duplicate-resolution' assessment.pk %}">Duplicate resolution</a>
+        {% if obj_perms.edit %}
+          <a class="dropdown-item" href="{% url 'lit:duplicate-resolution' assessment.pk %}">Duplicate resolution</a>
+        {% else %}
+          <a class="dropdown-item" href="{% url 'lit:resolved-duplicates' assessment.pk %}">Resolved duplicates</a>
+        {% endif %}
         <div class="dropdown-divider"></div>
         <span class="dropdown-header">Exports</span>
         <a class="dropdown-item" href="{% url 'lit:api:assessment-reference-export' assessment.pk %}?format=xlsx">Download all references</a>
diff --git a/hawc/apps/lit/templates/lit/resolved_duplicates.html b/hawc/apps/lit/templates/lit/resolved_duplicates.html
index 2a833bae9e..7bda5535f1 100644
--- a/hawc/apps/lit/templates/lit/resolved_duplicates.html
+++ b/hawc/apps/lit/templates/lit/resolved_duplicates.html
@@ -4,9 +4,11 @@
 {% block content %}
   <div class="d-flex">
     <h2>Resolved duplicates</h2>
-    {% actions %}
-      <a class="dropdown-item" href="{% url 'lit:duplicate-resolution' assessment.pk %}">Duplicate resolution</a>
-    {% endactions %}
+    {% if obj_perms.edit %}
+      {% actions %}
+        <a class="dropdown-item" href="{% url 'lit:duplicate-resolution' assessment.pk %}">Duplicate resolution</a>
+      {% endactions %}
+    {% endif %}
   </div>
   <ul class="list-group list-group-flush my-3">
     {% for object in object_list %}
diff --git a/hawc/apps/lit/views.py b/hawc/apps/lit/views.py
index d18e9a4edc..e607ed933b 100644
--- a/hawc/apps/lit/views.py
+++ b/hawc/apps/lit/views.py
@@ -1258,6 +1258,7 @@ class DuplicateResolution(BaseList):
     model = models.DuplicateCandidateGroup
     template_name = "lit/duplicate_resolution.html"
     breadcrumb_active_name = "Duplicate resolution"
+    assessment_permission = AssessmentViewPermissions.TEAM_MEMBER_EDITABLE
 
     paginate_by = 5
 
@@ -1283,6 +1284,7 @@ class ResolvedDuplicates(BaseList):
     model = models.DuplicateCandidateGroup
     template_name = "lit/resolved_duplicates.html"
     breadcrumb_active_name = "Resolved duplicates"
+    assessment_permission = AssessmentViewPermissions.TEAM_MEMBER
 
     def get_queryset(self):
         return (
@@ -1305,7 +1307,7 @@ class IdentifyDuplicates(MessageMixin, View):
 
     def get(self, request, *args, **kwargs):
         assessment = get_object_or_404(Assessment, pk=kwargs["pk"])
-        if not assessment.user_is_team_member_or_higher(request.user):
+        if not assessment.user_can_edit_object(request.user):
             raise PermissionDenied()
         url = reverse("lit:duplicate-resolution", args=(assessment.pk,))
         models.DuplicateCandidateGroup.create_duplicate_candidate_groups(assessment.pk)