Skip to content

Commit

Permalink
Merge branch 'pgvector_bigdata_insert_and_search' of github.com:uogbu…
Browse files Browse the repository at this point in the history
…ji/OgbujiPT into insert_many_fix
  • Loading branch information
choccccy committed Nov 24, 2023
2 parents a600630 + 10bf165 commit c497918
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions pylib/embedding/pgvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# import warnings
# import itertools
import json
from typing import Sequence
from typing import Iterable
from uuid import UUID
from datetime import datetime, timezone

Expand Down Expand Up @@ -76,7 +76,7 @@

PAGE_NUMBERS_WHERE_CLAUSE = 'page_numbers && {query_page_numbers} -- Overlap operator\n'

TAGS_WHERE_CLAUSE = 'tags && {query_tags} -- Overlap operator\n'
TAGS_WHERE_CLAUSE = 'tags @> ARRAY{query_tags} -- Overlap operator\n'
# ----------------------------------------------------------------------------------------------------------------------
# Generic SQL template for creating a table to hold individual messages from a chatlog and their metadata
CREATE_CHATLOG_TABLE = '''-- Create a table to hold individual messages from a chatlog and their metadata
Expand Down Expand Up @@ -328,7 +328,7 @@ async def insert(

async def insert_many(
self,
content_list: Sequence[tuple[str, str | None, list[int], list[str]]]
content_list: Iterable[tuple[str, str | None, str | None, list[int], list[str]]]
) -> None:
'''
Update a table with one or more embedded documents
Expand All @@ -340,12 +340,10 @@ async def insert_many(
'''
await self.conn.executemany(
INSERT_DOCS.format(table_name=self.table_name),
[
(
self._embedding_model.encode(content).tolist(), content, title, page_numbers, tags
)
(
(self._embedding_model.encode(content), content, title, page_numbers, tags)
for content, title, page_numbers, tags in content_list
]
)
)

async def search(
Expand Down

0 comments on commit c497918

Please sign in to comment.