Skip to content

Commit a130251

Browse files
committed
make cache a two step process to process embeddings then write. this will allow the next step for multiple threads to check if they have the most recent cached file
1 parent 0264d06 commit a130251

File tree

2 files changed

+23
-7
lines changed

2 files changed

+23
-7
lines changed

app/rag_system.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99
from sklearn.metrics.pairwise import cosine_similarity
1010
import traceback
11+
from atomicwrites import atomic_write
1112

1213
openai.api_base = os.getenv("OPENAI_BASE_URL")
1314
openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -36,15 +37,29 @@ def __init__(self, knowledge_base_path="./data/knowledge_base.json"):
3637
logging.info("Knowledge base embeddings created")
3738
self.conversation_history = []
3839

40+
def _atomic_save_numpy(self, file_path, data):
41+
with atomic_write(file_path, mode="wb", overwrite=True) as f:
42+
np.save(f, data)
43+
3944
def rebuild_embeddings(self):
40-
logging.info("No existing document embeddings found, creating new embeddings.")
41-
self.doc_embeddings = self.embed_knowledge_base()
42-
self.doc_about_embeddings = self.embed_knowledge_base_about()
43-
# cache doc_embeddings to disk
44-
np.save("./data/doc_embeddings.npy", self.doc_embeddings.cpu().numpy())
45-
np.save(
46-
"./data/doc_about_embeddings.npy", self.doc_about_embeddings.cpu().numpy()
45+
logging.info("Rebuilding document embeddings...")
46+
47+
new_doc_embeddings = self.embed_knowledge_base()
48+
new_about_embeddings = self.embed_knowledge_base_about()
49+
50+
# Atomic saves with guaranteed order
51+
self._atomic_save_numpy(
52+
"./data/doc_embeddings.npy", new_doc_embeddings.cpu().numpy()
4753
)
54+
self._atomic_save_numpy(
55+
"./data/doc_about_embeddings.npy", new_about_embeddings.cpu().numpy()
56+
)
57+
58+
# Update in-memory embeddings only after successful saves
59+
self.doc_embeddings = new_doc_embeddings
60+
self.doc_about_embeddings = new_about_embeddings
61+
62+
logging.info("Embeddings rebuilt successfully.")
4863

4964
def load_knowledge_base(self):
5065
with open(self.knowledge_base_path, "r") as kb_file:

app/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ PyYAML==6.0.2
1313
GitPython==3.1.44
1414
redis==6.2.0
1515
fakeredis==2.30.1
16+
atomicwrites==1.4.1
1617

1718
# linter
1819
ruff>=0.12.5

0 commit comments

Comments
 (0)