Skip to content

Commit

Permalink
Call executemany with a generator instead of a list.
Browse files Browse the repository at this point in the history
asyncpg's Connection.executemany([..]) can take a generator, so calling it with a list introduces an unnecessary memory inefficiency, which scales with the size of the records being inserted.
  • Loading branch information
chimezie committed Nov 24, 2023
1 parent c0ea0ad commit c864463
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions pylib/embedding/pgvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# import warnings
# import itertools
import json
from typing import Sequence
from typing import Sequence, Iterable
from uuid import UUID
from datetime import datetime, timezone

Expand Down Expand Up @@ -320,7 +320,7 @@ async def insert(

async def insert_many(
self,
content_list: Sequence[tuple[str, str | None, str | None, list[int], list[str]]]
content_list: Iterable[tuple[str, str | None, str | None, list[int], list[str]]]
) -> None:
'''
Update a table with one or more embedded documents
Expand All @@ -332,10 +332,10 @@ async def insert_many(
'''
await self.conn.executemany(
INSERT_DOCS.format(table_name=self.table_name),
[
(
(self._embedding_model.encode(content), content, title, page_numbers, tags)
for content, title, page_numbers, tags in content_list
]
)
)

async def search(
Expand Down

0 comments on commit c864463

Please sign in to comment.