Skip to content

Commit

Permalink
Merge pull request #78 from SADiLaR/feature/search-vector
Browse files Browse the repository at this point in the history
added vector search
  • Loading branch information
daniel-gray-tangent authored Jun 18, 2024
2 parents 833ca2b + 924eda3 commit 1e5c910
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 9 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ dev-quick-install:
dev-mass-pdf-upload:
@docker compose run --rm web python manage.py dev_pdf_mass_upload

dev_update_vector_search:
@docker compose run --rm web python manage.py dev_update_vector_search

docker-shell:
docker exec -it sadilar-terminology-web bash

Expand Down
11 changes: 3 additions & 8 deletions app/app/views.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import os

from django.contrib.postgres.search import (
SearchHeadline,
SearchQuery,
SearchRank,
SearchVector,
)
from django.contrib.postgres.search import SearchHeadline, SearchQuery, SearchRank
from django.core.paginator import Paginator
from django.db.models import Count
from django.http import HttpResponse
Expand Down Expand Up @@ -67,13 +62,13 @@ def search(request):
q = request.GET.get("q")

if q:
vector = SearchVector("title", "document_data")
queue = SearchQuery(q)
search_headline = SearchHeadline("document_data", queue)

documents = (
DocumentFile.objects.annotate(rank=SearchRank(vector, queue))
DocumentFile.objects.annotate(rank=SearchRank("search_vector", queue))
.annotate(search_headline=search_headline)
.filter(search_vector=queue)
.order_by("-rank")
)

Expand Down
1 change: 1 addition & 0 deletions app/general/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.fields["document_data"].widget = HiddenInput()
self.fields["search_vector"].widget = HiddenInput()

# If the instance has a mime_type, the field should be disabled
if not self.instance.mime_type:
Expand Down
20 changes: 20 additions & 0 deletions app/general/management/commands/dev_update_vector_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os

from django.core.management.base import BaseCommand

from general.models import DocumentFile


class Command(BaseCommand):
help = "Updating the Vector Search index on document_file."

def handle(self, *args, **options):
os.system("clear")
print("Querying the Vector Search index and Updating.")

all_document_files = DocumentFile.objects.all()

for document_file in all_document_files:
document_file.save() # This line updates the vector search for the document file
print(f"Updated {document_file.title}.")
print()
35 changes: 35 additions & 0 deletions app/general/migrations/00010_documentfile_search_vector_trigger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from django.contrib.postgres.search import SearchVector
from django.db import migrations


def compute_search_vector(apps, schema_editor):
Quote = apps.get_model("general", "DocumentFile")
Quote.objects.update(search_vector=SearchVector("document_data", "title"))


class Migration(migrations.Migration):
dependencies = [
("general", "0009_documentfile_search_vector_and_more"),
]

operations = [
migrations.RunSQL(
sql="""
CREATE TRIGGER search_vector_trigger
BEFORE INSERT OR UPDATE OF document_data, title, search_vector
ON general_documentfile
FOR EACH ROW EXECUTE PROCEDURE
tsvector_update_trigger(
search_vector, 'pg_catalog.english', document_data, title
);
UPDATE general_documentfile SET search_vector = NULL;
""",
reverse_sql="""
DROP TRIGGER IF EXISTS search_vector_trigger
ON general_documentfile;
""",
),
migrations.RunPython(
compute_search_vector, reverse_code=migrations.RunPython.noop
),
]
24 changes: 24 additions & 0 deletions app/general/migrations/0009_documentfile_search_vector_and_more.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 5.0.2 on 2024-06-14 10:33

import django.contrib.postgres.indexes
import django.contrib.postgres.search
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('general', '0008_documentfile_description_and_more'),
]

operations = [
migrations.AddField(
model_name='documentfile',
name='search_vector',
field=django.contrib.postgres.search.SearchVectorField(blank=True, null=True),
),
migrations.AddIndex(
model_name='documentfile',
index=django.contrib.postgres.indexes.GinIndex(fields=['search_vector'], name='general_doc_search__752b22_gin'),
),
]
10 changes: 9 additions & 1 deletion app/general/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVectorField
from django.core.validators import FileExtensionValidator
from django.db import models
from django.utils.translation import gettext_lazy as _
Expand Down Expand Up @@ -140,12 +142,18 @@ class DocumentFile(models.Model):
subjects = models.ManyToManyField("Subject", blank=True, verbose_name=_("subjects"))
languages = models.ManyToManyField("Language", blank=True, verbose_name=_("languages"))

search_vector = SearchVectorField(null=True, blank=True)

# added simple historical records to the model
history = HistoricalRecords(excluded_fields=["document_data"])
history = HistoricalRecords(excluded_fields=["document_data", "search_vector"])

class Meta:
verbose_name = _("Document File")
verbose_name_plural = _("Document Files")

indexes = [
GinIndex(fields=["search_vector"]),
]

def __str__(self):
return self.title

0 comments on commit 1e5c910

Please sign in to comment.