Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Db interface #95

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions backend/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ def __init__(self, path='.env', gpt_model="gpt-3.5-turbo"):
load_dotenv(dotenv_path=path)
self.API_KEY = os.getenv('OPENAI_API_KEY')
self.MONGODB_URI = os.getenv('MONGODB_URI')
self.MONGODB_TEST_URI = os.getenv('MONGODB_TEST_URI')


49 changes: 45 additions & 4 deletions backend/flashcards/knowledge_base/db_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,26 @@ def post_curriculum(

Args:
curriculum (str): The curriculum to be posted
embedding (list[float]): The embedding of the question
embedding (list[float]): The embedding of the page

Returns:
bool: True if the curriculum was posted, False otherwise
"""
pass

@abstractmethod
def delete_all_curriculum(self) -> bool:
"""
Delete all curriculum from the database

Returns:
bool: True if the curriculum was deleted, False otherwise
"""
pass

class MongoDB(DatabaseInterface):
def __init__(self):
self.client = MongoClient(Config().MONGODB_URI)
def __init__(self, uri:str=Config().MONGODB_URI):
self.client = MongoClient(uri)
self.db = self.client["test-curriculum-database"]
self.collection = self.db["test-curriculum-collection"]
self.similarity_threshold = 0.7
Expand All @@ -96,7 +105,6 @@ def get_curriculum(self, pdf_name: str, embedding: list[float]) -> list[Page]:

# Execute the query
documents = self.collection.aggregate([query])

if not documents:
raise ValueError("No documents found")

Expand Down Expand Up @@ -181,3 +189,36 @@ def post_curriculum(
return True
except:
return False

def delete_all_curriculum(self) -> bool:
"""
Delete all curriculum from the database

Returns:
bool: True if all curriculum were deleted, False otherwise
"""
try:
# Deleting all documents from MongoDB collection
self.collection.delete_many({})
return True
except Exception as e:
print("Error deleting curriculum:", e)
return False

def delete_pdf_pages(self, pdf_name: str) -> bool:
"""
Delete all curriculum entries with a specific PDF name from the database

Args:
pdf_name (str): The PDF name to match for deletion

Returns:
bool: True if all matching curriculum entries were deleted, False otherwise
"""
try:
# Deleting documents from MongoDB collection based on a condition
self.collection.delete_many({"pdfName": pdf_name})
return True
except Exception as e:
print("Error deleting curriculum:", e)
return False
45 changes: 45 additions & 0 deletions backend/flashcards/knowledge_base/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from django.test import TestCase
from flashcards.knowledge_base.embeddings import cosine_similarity
from flashcards.knowledge_base.embeddings import OpenAIEmbedding
from flashcards.knowledge_base.db_interface import MongoDB
from config import Config


class MongoDBTest(TestCase):
def setUp(self):
# Initialize MongoDB connection
self.mongo = MongoDB(uri=Config().MONGODB_TEST_URI)

self.curriculum = {"pdf1":"Antonio López de Santa Anna var en meksikansk politiker og general. Fra slutten av 1820-årene og frem til 1855 dominerte han Mexicos politiske liv, og var president seks ganger. Han var en ytterst fargerik personlighet uten noen politisk filosofi, men meget populær blant folket.",
"pdf2":"I 1829 gjorde spanske tropper et mislykket forsøk på å gjenerobre Mexico. Santa Annas seier mot invasjonsstyrken i Tampico ga ham anerkjennelse som nasjonalist og militærstrateg, et omdømme han nøt godt av de neste 25 årene. Gjennom karrieren var Santa Anna en typisk caudillo som vekslet mellom politisk og militær makt, i en tid da militærmakt var nøkkelen til politisk kontroll.",
"pdf3":"I 1833 kom han til makten som føderalist og motstander av den romersk-katolske kirken; i praksis etablerte han en sentralisert stat. Han forble ved presidentmakten til 1836, da han ledet meksikanske tropper inn i Texas for å dempe Texasrevolusjonen. Her ble han tatt til fange av Sam Houston, og ble tvunget til å anerkjenne den nye Republikken Texas."}

for key in self.curriculum.keys():
self.mongo.post_curriculum(self.curriculum[key], 1, key, OpenAIEmbedding().get_embedding(self.curriculum[key]))

def test_get_curriculum_for_embedding_for_the_same_document_content(self):
# Test getting curriculum for from the same text
curriculum = self.mongo.get_curriculum("pdf1", OpenAIEmbedding().get_embedding(self.curriculum["pdf1"]))
self.assertEqual(curriculum[0].text, self.curriculum["pdf1"])
self.assertEqual(curriculum[0].page_num, 1)
self.assertEqual(curriculum[0].pdf_name, "pdf1")

def test_get_curriculum_for_query_similar_to_pdf(self):
# Test getting curriculum using query similar text
curriculum = self.mongo.get_curriculum("pdf3", OpenAIEmbedding().get_embedding("Den romersk-katolske kirken var ikke stor i Texas under revolusjonen. Sam Houston var personlig en protestant"))
self.assertEqual(curriculum[0].text, self.curriculum["pdf3"])
self.assertEqual(curriculum[0].page_num, 1)
self.assertEqual(curriculum[0].pdf_name, "pdf3")

def test_delete_pdf_pages(self):
# Test deleting curriculum entries with specific PDF name
self.assertTrue(self.mongo.delete_pdf_pages("pdf1"))

# Check if curriculum entries with pdfName="pdf1" were deleted
curriculum = self.mongo.get_curriculum("pdf1",OpenAIEmbedding().get_embedding(self.curriculum["pdf1"]))

self.assertEqual(curriculum[0].pdf_name, "pdf2")

def tearDown(self):
# Clean up test data
self.mongo.delete_all_curriculum()
2 changes: 0 additions & 2 deletions backend/flashcards/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

base = "/api/"


# class TextToFlashcardTest(TestCase):
# def setUp(self) -> None:
# self.context = "Revenge of the Sith is set three years after the onset of the Clone Wars as established in Attack of the Clones. The Jedi are spread across the galaxy in a full-scale war against the Separatists. The Jedi Council dispatches Jedi Master Obi-Wan Kenobi on a mission to defeat General Grievous, the head of the Separatist army and Count Dooku's former apprentice, to put an end to the war. Meanwhile, after having visions of his wife Padmé Amidala dying in childbirth, Jedi Knight Anakin Skywalker is tasked by the Council to spy on Palpatine, the Supreme Chancellor of the Galactic Republic and, secretly, a Sith Lord. Palpatine manipulates Anakin into turning to the dark side of the Force and becoming his apprentice, Darth Vader, with wide-ranging consequences for the galaxy."
Expand Down Expand Up @@ -41,7 +40,6 @@
#
# def process_answer_tapi self.assertFalse(None, process_answer(user_input))


class RagAPITest(TestCase):
def setUp(self):
self.client = Client()
Expand Down
Loading