Skip to content

Commit

Permalink
fix chroma embeddings when an already seen file is uplaoded
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Nov 18, 2023
1 parent c5c0bc4 commit 320f843
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions document_qa/document_qa_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,16 @@ def create_memory_embeddings(self, pdf_path, doc_id=None, chunk_size=500, perc_o
if doc_id:
hash = doc_id
else:

hash = metadata[0]['hash']

if hash not in self.embeddings_dict.keys():
self.embeddings_dict[hash] = Chroma.from_texts(texts, embedding=self.embedding_function, metadatas=metadata, collection_name=hash)
self.embeddings_dict[hash] = Chroma.from_texts(texts, embedding=self.embedding_function, metadatas=metadata,
collection_name=hash)
else:
self.embeddings_dict[hash].delete(ids=self.embeddings_dict[hash].get()['ids'])
self.embeddings_dict[hash] = Chroma.from_texts(texts, embedding=self.embedding_function, metadatas=metadata,
collection_name=hash)


self.embeddings_root_path = None

Expand Down

0 comments on commit 320f843

Please sign in to comment.