diff --git a/images/image-18.png b/images/image-18.png new file mode 100644 index 0000000..3be8730 Binary files /dev/null and b/images/image-18.png differ diff --git a/images/image-19.png b/images/image-19.png new file mode 100644 index 0000000..3900843 Binary files /dev/null and b/images/image-19.png differ diff --git a/images/image-20.png b/images/image-20.png new file mode 100644 index 0000000..3febe35 Binary files /dev/null and b/images/image-20.png differ diff --git a/images/image-21.png b/images/image-21.png new file mode 100644 index 0000000..9a11c03 Binary files /dev/null and b/images/image-21.png differ diff --git a/images/image-22.png b/images/image-22.png new file mode 100644 index 0000000..bd7880c Binary files /dev/null and b/images/image-22.png differ diff --git a/images/image-23.png b/images/image-23.png new file mode 100644 index 0000000..9ad702f Binary files /dev/null and b/images/image-23.png differ diff --git a/images/image-24.png b/images/image-24.png new file mode 100644 index 0000000..48daf89 Binary files /dev/null and b/images/image-24.png differ diff --git a/images/image-25.png b/images/image-25.png new file mode 100644 index 0000000..836e64c Binary files /dev/null and b/images/image-25.png differ diff --git a/images/image-26.png b/images/image-26.png new file mode 100644 index 0000000..6512c8a Binary files /dev/null and b/images/image-26.png differ diff --git a/images/image-27.png b/images/image-27.png new file mode 100644 index 0000000..f6da37a Binary files /dev/null and b/images/image-27.png differ diff --git a/images/image-28.png b/images/image-28.png new file mode 100644 index 0000000..05750fd Binary files /dev/null and b/images/image-28.png differ diff --git a/images/image-29.png b/images/image-29.png new file mode 100644 index 0000000..e44dc78 Binary files /dev/null and b/images/image-29.png differ diff --git a/images/image-30.png b/images/image-30.png new file mode 100644 index 0000000..297c2c5 Binary files /dev/null and b/images/image-30.png differ diff --git a/images/image-31.png b/images/image-31.png new file mode 100644 index 0000000..4c852fa Binary files /dev/null and b/images/image-31.png differ diff --git a/images/image-32.png b/images/image-32.png new file mode 100644 index 0000000..d9fbfe9 Binary files /dev/null and b/images/image-32.png differ diff --git a/images/image-33.png b/images/image-33.png new file mode 100644 index 0000000..4293444 Binary files /dev/null and b/images/image-33.png differ diff --git a/script/chatting.py b/script/chatting.py new file mode 100644 index 0000000..4f72d9a --- /dev/null +++ b/script/chatting.py @@ -0,0 +1,48 @@ +import gc +import argparse + +from send_request import send_request + +def chatting(args): + """ + main entry point + """ + + data = { + "api_key": str(args.api_key), + "user": str(args.user), + "data_id": str(args.data_id), + "question": str(args.question) + } + + result = send_request(args.api_url, data) + + gc.collect() + + print(result) + +if __name__ == "__main__": + """ + Form command lines + """ + # Clean up buffer memory + gc.collect() + + user = "user@gmail.com" + api_key = "AMEYbpdcmrUxNu_Fb80qutukUZdlsmYiH4g7As5LzNA" + data_id = "" + api_url = "http://localhost:8000/conversation" + question = "hi" + + # Add options + p = argparse.ArgumentParser() + p = argparse.ArgumentParser(description="Translate text within an image.") + p.add_argument("--api_url", type=str, default=api_url, help="URL to send the POST request to") + p.add_argument("--data_id", type=str, default=data_id, help="payload directory to the test example") + p.add_argument("--user", type=str, default=user, help="user") + p.add_argument("--question", type=str, default=question, help="user's question") + p.add_argument("--api_key", type=str, default=api_key, help="api key") + args = p.parse_args() + + + chatting(args) diff --git a/script/check_api_key.py b/script/check_api_key.py new file mode 100644 index 0000000..94ddcce --- /dev/null +++ b/script/check_api_key.py @@ -0,0 +1,44 @@ +import gc +import argparse + +from send_request import send_request + +def check_api_key(args): + """ + main entry point + """ + + data = { + "api_key": str(args.api_key), + "user": str(args.user), + "data_id": str(args.data_id) + } + + result = send_request(args.api_url, data) + + gc.collect() + + print(result) + +if __name__ == "__main__": + """ + Form command lines + """ + # Clean up buffer memory + gc.collect() + + user = "user@gmail.com" + api_key = "AMEYbpdcmrUxNu_Fb80qutukUZdlsmYiH4g7As5LzNA" + data_id = "" + api_url = "http://localhost:8000/check_api" + + # Add options + p = argparse.ArgumentParser() + p = argparse.ArgumentParser(description="Translate text within an image.") + p.add_argument("--api_url", type=str, default=api_url, help="URL to send the POST request to") + p.add_argument("--data_id", type=str, default=data_id, help="payload directory to the test example") + p.add_argument("--user", type=str, default=user, help="user") + p.add_argument("--api_key", type=str, default=api_key, help="api key") + args = p.parse_args() + + check_api_key(args) \ No newline at end of file diff --git a/script/create_api_key.py b/script/create_api_key.py new file mode 100644 index 0000000..90385ae --- /dev/null +++ b/script/create_api_key.py @@ -0,0 +1,47 @@ +import gc +import argparse + +from send_request import send_request + +def create_api_key(args): + """ + main entry point + """ + + data = { + "user": str(args.user), + "title": str(args.title), + "description": str(args.description), + "data_id": str(args.data_id) + } + + result = send_request(args.api_url, data) + + gc.collect() + + print(result) + +if __name__ == "__main__": + """ + Form command lines + """ + # Clean up buffer memory + gc.collect() + + user = "user@gmail.com" + title = "title" + description = "description" + data_id = "" + api_url = "http://localhost:8000/create_api" + + # Add options + p = argparse.ArgumentParser() + p = argparse.ArgumentParser(description="Translate text within an image.") + p.add_argument("--api_url", type=str, default=api_url, help="URL to send the POST request to") + p.add_argument("--data_id", type=str, default=data_id, help="payload directory to the test example") + p.add_argument("--user", type=str, default=user, help="user") + p.add_argument("--title", type=str, default=title, help="title") + p.add_argument("--description", type=str, default=description, help="title") + args = p.parse_args() + + create_api_key(args) \ No newline at end of file diff --git a/script/delete_api_key.py b/script/delete_api_key.py new file mode 100644 index 0000000..a9d9324 --- /dev/null +++ b/script/delete_api_key.py @@ -0,0 +1,45 @@ +import gc +import argparse + +from send_request import send_request + +def delete_api_key(args): + """ + main entry point + """ + + data = { + "api_key": str(args.api_key), + "user": str(args.user), + "data_id": str(args.data_id) + } + + result = send_request(args.api_url, data) + + gc.collect() + + print(result) + +if __name__ == "__main__": + """ + Form command lines + """ + # Clean up buffer memory + gc.collect() + + user = "user@gmail.com" + api_key = "AMEYbpdcmrUxNu_Fb80qutukUZdlsmYiH4g7As5LzNA" + data_id = "" + api_url = "http://localhost:8000/delete_api" + + # Add options + p = argparse.ArgumentParser() + p = argparse.ArgumentParser(description="Translate text within an image.") + p.add_argument("--api_url", type=str, default=api_url, help="URL to send the POST request to") + p.add_argument("--data_id", type=str, default=data_id, help="payload directory to the test example") + p.add_argument("--user", type=str, default=user, help="user") + p.add_argument("--api_key", type=str, default=api_key, help="api key") + args = p.parse_args() + + + delete_api_key(args) \ No newline at end of file diff --git a/script/finetuning.py b/script/finetuning.py new file mode 100644 index 0000000..942a01c --- /dev/null +++ b/script/finetuning.py @@ -0,0 +1,44 @@ +import gc +import argparse + +from send_request import send_request + +def run_fine_tuning(args): + """ + main entry point + """ + + data = { + "api_key": str(args.api_key), + "user": str(args.user), + "data_id": str(args.data_id) + } + + result = send_request(args.api_url, data) + + gc.collect() + + print(result) + +if __name__ == "__main__": + """ + Form command lines + """ + # Clean up buffer memory + gc.collect() + + user = "user@gmail.com" + api_key = "AMEYbpdcmrUxNu_Fb80qutukUZdlsmYiH4g7As5LzNA" + data_id = "" + api_url = "http://localhost:8000/finetuning" + + # Add options + p = argparse.ArgumentParser() + p = argparse.ArgumentParser(description="Translate text within an image.") + p.add_argument("--api_url", type=str, default=api_url, help="URL to send the POST request to") + p.add_argument("--data_id", type=str, default=data_id, help="payload directory to the test example") + p.add_argument("--user", type=str, default=user, help="user") + p.add_argument("--api_key", type=str, default=api_key, help="api key") + args = p.parse_args() + + run_fine_tuning(args) diff --git a/script/send_request.py b/script/send_request.py new file mode 100644 index 0000000..68bf280 --- /dev/null +++ b/script/send_request.py @@ -0,0 +1,23 @@ +import requests +import json +import os + +def send_request(api_url, data): + """Sends a POST request to the specified URL with the provided data.""" + headers = { + 'accept': 'application/json', + 'Content-Type': 'application/json' + } + + try: + # Convert WindowsPath to string in the data dictionary + data = {key: str(val) if isinstance(val, os.PathLike) else val for key, val in data.items()} + + response = requests.post(api_url, headers=headers, data=json.dumps(data)) + if response.status_code == 200: + return response.json() + else: + raise Exception(f"Request failed with status code: {response.status_code}") + except Exception as e: + raise Exception(f"An error occurred while sending the request: {e}") + diff --git a/src/Dockerfile b/src/Dockerfile index c02b506..1de1d34 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -3,6 +3,7 @@ FROM python:3.11.0 as base # Define a build argument named PROJECT with a default value of 'api'. ARG PROJECT=src +ARG TEST=test RUN apt-get update && apt-get install -y \ poppler-utils @@ -27,6 +28,7 @@ RUN pip install --upgrade llama-index # Copy the project files into the container. COPY --chown=user:user ./$PROJECT /home/user/$PROJECT +COPY --chown=user:user ./$TEST /home/user/$TEST # Copy the .env file into the container. COPY --chown=user:user .env /home/user/.env @@ -48,6 +50,8 @@ RUN pip install --upgrade llama-index USER root # Copy the project files into the container. COPY --chown=user:user ./$PROJECT /home/user/$PROJECT +COPY --chown=user:user ./$TEST /home/user/$TEST + # Create a directory for mypy cache and change its ownership to the non-root user. RUN mkdir /home/user/.mypy_cache && chown user:user -R /home/user/.mypy_cache # Install pytest within the test stage diff --git a/src/main.py b/src/main.py index df0554d..82751be 100644 --- a/src/main.py +++ b/src/main.py @@ -22,8 +22,8 @@ from utils.total_process import total_process from utils.create_api import create_api_key -from utils.delete_api_key import delete_api_key -from utils.check_api_key import check_api_key +from src.utils.delete_api import delete_api_key +from src.utils.check_api import check_api_key from utils.chatting import chatting # Create a FastAPI application @@ -88,7 +88,9 @@ async def finetuning(request_body: MainModel): 'api_key' : api_key } - total_process(args) + result = total_process(args) + + return result @app.post("/create_api") @@ -107,7 +109,9 @@ async def create_api(request_body: CreateAPIModel): 'description' : request_body.description } - create_api_key(args) + result = create_api_key(args) + + return result @app.post("/delete_api") @@ -130,7 +134,9 @@ async def delete_api(request_body: MainModel): 'api_key' : api_key } - delete_api_key(args) + result = delete_api_key(args) + + return result @app.post("/check_api") @@ -153,7 +159,9 @@ async def check_api(request_body: MainModel): 'api_key' : api_key } - check_api_key(args) + result = check_api_key(args) + + return result @app.post("/conversation") @@ -177,4 +185,6 @@ async def conversation(request_body: ChattingModel): 'question' : request_body.question } - chatting(args) \ No newline at end of file + result = chatting(args) + + return result \ No newline at end of file diff --git a/src/mongodb/MongoDBClass.py b/src/mongodb/MongoDBClass.py index ccbcf50..1d1dda3 100644 --- a/src/mongodb/MongoDBClass.py +++ b/src/mongodb/MongoDBClass.py @@ -126,12 +126,12 @@ def create_api(self, data:APIModel): # Check if the insertion was successful if result.inserted_id: print("New item has been added to the collection with ID:", result.inserted_id) - return True + return {"status": "success", "api_key": data['api'], "message": f"New item has been added to the collection with ID: {result.inserted_id}"} else: print("Failed to add a new item to the collection") - return False + return {"status": "failed", "message": "Failed to add a new item to the collection"} else: - return db + return {"status": "failed", "message": "Failed to add a new item to the collection"} def delete_api(self, api_key, user): # Connect to MongoDB @@ -149,10 +149,12 @@ def delete_api(self, api_key, user): result = collection.update_one(filter_condition, update_operation) if result.modified_count == 1: print("Document updated successfully") + return {"status": "success", "message": "Successfully deleted"} else: print("No matching document found") + return {"status": "failed", "message": "No matching document found"} else: - return db + return {"status": "failed", "message": "No matching document found"} def check_validation_api(self, api_key, user): # Connect to MongoDB @@ -168,9 +170,9 @@ def check_validation_api(self, api_key, user): existing_document = collection.find_one(filter_condition) if existing_document: print("Document exists in the collection") - return True + return {"status": "success", "message": "Document exists in the collection"} else: print("Document does not exist in the collection") - return False + return {"status": "failed", "message": "Document does not exist in the collection"} else: - return db \ No newline at end of file + return {"status": "failed", "message": "Document does not exist in the collection"} \ No newline at end of file diff --git a/src/utils/chatting.py b/src/utils/chatting.py index de1827c..8122a79 100644 --- a/src/utils/chatting.py +++ b/src/utils/chatting.py @@ -35,7 +35,11 @@ def chatting(args): response = chatting.ask_question(args['question']) print(response) + + return {"status": "success", "response": response.response} else: print("invalide api key") + return {"status": "success", "fine_tuned_model": "invalide api key"} + gc.collect() \ No newline at end of file diff --git a/src/utils/check_api.py b/src/utils/check_api.py new file mode 100644 index 0000000..45549d8 --- /dev/null +++ b/src/utils/check_api.py @@ -0,0 +1,29 @@ +import gc +from pathlib import Path + +from src.utils.read_json import read_json +from src.mongodb.MongoDBClass import MongoDBClass + + +def check_api_key(args): + """ + main entry point + """ + + # Payload + payload_data = read_json(args['payload_dir']) + + # Construct the MongoDB Atlas URI + mongo_uri = payload_data["mongo_uri"] + + # Call class instance + mongodb = MongoDBClass( + db_name=payload_data["db_name"], + collection_name=payload_data["collection_name"], + mongo_uri=mongo_uri) + + result = mongodb.check_validation_api(api_key=str(Path(args['api_key'])), user=str(Path(args['user']))) + + gc.collect() + + return result \ No newline at end of file diff --git a/src/utils/create_api.py b/src/utils/create_api.py index e49e77f..08f6c8b 100644 --- a/src/utils/create_api.py +++ b/src/utils/create_api.py @@ -36,6 +36,8 @@ def create_api_key(args): "updated_at": datetime.now(), } - mongodb.create_api(data) + result = mongodb.create_api(data) gc.collect() + + return result diff --git a/src/utils/delete_api.py b/src/utils/delete_api.py new file mode 100644 index 0000000..bd38815 --- /dev/null +++ b/src/utils/delete_api.py @@ -0,0 +1,28 @@ +import gc +from pathlib import Path + +from src.utils.read_json import read_json +from src.mongodb.MongoDBClass import MongoDBClass + +def delete_api_key(args): + """ + main entry point + """ + + # Payload + payload_data = read_json(args['payload_dir']) + + # Construct the MongoDB Atlas URI + mongo_uri = payload_data["mongo_uri"] + + # Call class instance + mongodb = MongoDBClass( + db_name=payload_data["db_name"], + collection_name=payload_data["collection_name"], + mongo_uri=mongo_uri) + + result = mongodb.delete_api(api_key=str(Path(args['api_key'])), user=str(Path(args['user']))) + + gc.collect() + + return result \ No newline at end of file diff --git a/src/utils/total_process.py b/src/utils/total_process.py index 478ff2c..32113d6 100644 --- a/src/utils/total_process.py +++ b/src/utils/total_process.py @@ -33,7 +33,7 @@ def total_process(args): is_available = mongodb.check_validation_api(api_key=str(Path(args['api_key'])), user=str(Path(args['user']))) - if is_available: + if is_available['status'] == "success": print("valid api key") # Separate the data separate_data(payload_data["data_path"], payload_data["threasold_image_percent_of_pdf"]) @@ -136,16 +136,19 @@ def total_process(args): fine_tune.jsonl_generation() # Fine tuning - fine_tune.finetune() + fine_tuned_model = fine_tune.finetune() # Write into log file end_time = time.time() msg = f"Total processing time: {end_time - start_time} seconds" print(msg) + + return {"status": "success", "fine_tuned_model": fine_tuned_model} else: print("invalide api key") - gc.collect() + return {"status": "success", "message": "invalide api key"} + def save_to_txt(payload_data, result: str): current_time = datetime.now().strftime('%y_%m_%d_%H_%M_%S') diff --git a/test/regression/regression_test013/train_data/24_01_12_04_34_50_data.txt b/test/regression/regression_test013/train_data/24_01_12_04_34_50_data.txt new file mode 100644 index 0000000..7da707b --- /dev/null +++ b/test/regression/regression_test013/train_data/24_01_12_04_34_50_data.txt @@ -0,0 +1,8 @@ +The images are of handwritten mathematical notes demonstratinga process for finding the vector, symmetric, and parametric equations of a line in three-dimensional space. + +The first half of the document focuses on a line that passes through the point (2, 1, 3), and is parallel to the vector \(3\mathbf{i} - 2\mathbf{j} + 5\mathbf{k}\). The vector equation is expressed as \( \mathbf{r} = \mathbf{r_0} + t\mathbf{v} \), where \( \mathbf{r} \) is the position vector of a generic point on the line, and \( t \) is a scalar parameter. The conversion of the vector equation to parametric equations by equating the components of the vectors is then shown, specifically: \( x = x_0 + at \), \( y = y_0 + bt \), and \( z = z_0 + ct \), after which it gets specific by substituting a set of point coordinates and vector components into the formula. The symmetric equation is also derived. + +The second half repeats the process with a line through the points A(1, 3, -2) and B(4,1,5). Here the vector \( \mathbf{v} = \overrightarrow{AB} \) is calculated and then used to establish vector, parametric, and symmetric equations for this line. + +The equations are complemented by hand-drawn visuals like a coordinate system to illustrate the concept. For clarity, important parts of the equations are highlighted in yellow, while key points are boxed or underlined. It's worth noting that the notes are labeled "Scanned with CamScanner," indicating they were digitized using an app. + diff --git a/test/regression/regression_test013/train_data/24_01_12_04_35_22_data.txt b/test/regression/regression_test013/train_data/24_01_12_04_35_22_data.txt new file mode 100644 index 0000000..67111f7 --- /dev/null +++ b/test/regression/regression_test013/train_data/24_01_12_04_35_22_data.txt @@ -0,0 +1,12 @@ +The image provided appears to be a page from an educational document or textbook explaining the concepts of accuracy and precision, particularly in the context of scientific measurement. It features text and three dartboard illustrations, each visually representing a different scenario to further clarify accuracy and precision. + +1. The first dartboard on the left is labeled "Inaccurate and Imprecise." It shows darts scattered mainly to one side of the center an, indicating that both accuracy and precision are not achieved since the target of the board (bullseye) is not hit and the darts are neither consistently hitting the same spot. + +2. The second dartboard in the middle, labeled "Inaccurate but Precise," displays darts closely grouped together but off-center. This represents a state of precision where the darts are consistently hitting the same incorrect area (repeatability) but not hitting the intended target, reflecting the absence of accuracy. + +3. The third dartboard on the right, labeled "Accurate and Precise," shows a cluster of darts in the center of the board. This demonstrates both accuracy as the darts hit the target and precision as they are consistently grouped in the same area. + +The images are used as metaphors to grasp that precision pertains to the repeatability or consistency of results, while accuracy indicates how close the results are to the true value. The text around these images further elaborates these concepts and their difference. The importance of calibrating equipment to attain accurate and precise measurements is also highlighted in the text. + +Significantly, the text introduces the concept of 'significant figures' in scientific measurement and explains their definitions, stressing that understanding these concepts contribute to proper scientific inquiry and the importance of being accurate (hitting the target) and precise (consistency between results). +