|
8 | 8 | import numpy as np |
9 | 9 | from sklearn.metrics.pairwise import cosine_similarity |
10 | 10 | import traceback |
| 11 | +from atomicwrites import atomic_write |
11 | 12 |
|
12 | 13 | openai.api_base = os.getenv("OPENAI_BASE_URL") |
13 | 14 | openai.api_key = os.getenv("OPENAI_API_KEY") |
@@ -36,15 +37,29 @@ def __init__(self, knowledge_base_path="./data/knowledge_base.json"): |
36 | 37 | logging.info("Knowledge base embeddings created") |
37 | 38 | self.conversation_history = [] |
38 | 39 |
|
| 40 | + def _atomic_save_numpy(self, file_path, data): |
| 41 | + with atomic_write(file_path, mode="wb", overwrite=True) as f: |
| 42 | + np.save(f, data) |
| 43 | + |
39 | 44 | def rebuild_embeddings(self): |
40 | | - logging.info("No existing document embeddings found, creating new embeddings.") |
41 | | - self.doc_embeddings = self.embed_knowledge_base() |
42 | | - self.doc_about_embeddings = self.embed_knowledge_base_about() |
43 | | - # cache doc_embeddings to disk |
44 | | - np.save("./data/doc_embeddings.npy", self.doc_embeddings.cpu().numpy()) |
45 | | - np.save( |
46 | | - "./data/doc_about_embeddings.npy", self.doc_about_embeddings.cpu().numpy() |
| 45 | + logging.info("Rebuilding document embeddings...") |
| 46 | + |
| 47 | + new_doc_embeddings = self.embed_knowledge_base() |
| 48 | + new_about_embeddings = self.embed_knowledge_base_about() |
| 49 | + |
| 50 | + # Atomic saves with guaranteed order |
| 51 | + self._atomic_save_numpy( |
| 52 | + "./data/doc_embeddings.npy", new_doc_embeddings.cpu().numpy() |
47 | 53 | ) |
| 54 | + self._atomic_save_numpy( |
| 55 | + "./data/doc_about_embeddings.npy", new_about_embeddings.cpu().numpy() |
| 56 | + ) |
| 57 | + |
| 58 | + # Update in-memory embeddings only after successful saves |
| 59 | + self.doc_embeddings = new_doc_embeddings |
| 60 | + self.doc_about_embeddings = new_about_embeddings |
| 61 | + |
| 62 | + logging.info("Embeddings rebuilt successfully.") |
48 | 63 |
|
49 | 64 | def load_knowledge_base(self): |
50 | 65 | with open(self.knowledge_base_path, "r") as kb_file: |
|
0 commit comments