diff --git a/README.md b/README.md index 8747228..ac6b3ea 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Sistem implementira dvo-fazni pristop za ustvarjanje kakovostnih QA parov: 2. Namestite odvisnosti: ```bash - pip install fastapi uvicorn python-dotenv minio openai pydantic PyMuPDF pillow requests jinja2 + pip install fastapi uvicorn python-dotenv minio openai pydantic PyMuPDF pillow requests jinja2 filelock ``` 3. Ustvarite `.env` datoteko z naslednjimi spremenljivkami: @@ -78,9 +78,10 @@ Zaženite spletni strežnik: uvicorn app:app --reload --host 0.0.0.0 --port 8000 ``` -Pregledovalec QA parov lahko sedaj odpre brskalnik na http://localhost:8000 in: +Pregledovalec QA parov najprej v brskalniku odpre http://localhost:8000/login, +se prijavi z e-naslovom in nato dostopa do http://localhost:8000, kjer lahko: - Pregleda prikazane pare vprašanj in odgovorov. - Jih označi kot "Ustrezen", "Neustrezen" ali "Preskoči". - Po potrebi uredi in popravi njihovo vsebino. -- Povratne informacije se shranjujejo v app_data/feedback.json. \ No newline at end of file +- Povratne informacije se shranjujejo v app_data/feedback.json. diff --git a/app.py b/app.py index 7b83db4..d2891c7 100644 --- a/app.py +++ b/app.py @@ -6,9 +6,10 @@ # Spletni strežnik, ki skrbi za prikaz spletne strani in sprejemanje/vraćanje # podatkov. -from fastapi import FastAPI, Form +from fastapi import FastAPI, Form, Request from fastapi.responses import HTMLResponse from fastapi.staticfiles import StaticFiles +from starlette.middleware.sessions import SessionMiddleware # Delo z datotečnimi potmi. from pathlib import Path # Shranjevanje in branje podatkov v JSON formatu. @@ -30,9 +31,12 @@ from datetime import datetime # Beleženje dogodkov in napak. import logging +import asyncio +from filelock import FileLock # Ustvarimo instanco FastAPI aplikacije. app = FastAPI() +app.add_middleware(SessionMiddleware, secret_key="change-me") # Nastavimo, da se vsebina mape "static" streže pod URLjem /static. app.mount("/static", StaticFiles(directory="static"), name="static") @@ -68,6 +72,8 @@ }) # Končni seznam parov vprašanj in odgovorov, skupaj z metapodatki. qa_data = flattened_data +qa_lock = asyncio.Lock() +feedback_lock = FileLock(str(feedback_path) + ".lock") # Iz diska naložimo HTML predloge, ki so v obliki samostojnih strani: # Prikaže glavno stran z enim parom vprašanje-odgovor. @@ -76,6 +82,7 @@ no_qa_template = Template(Path("templates/no_qa.html").read_text(encoding="utf-8")) # Izpiše zahvalo uporabniku za sodelovanje. thank_you_template = Template(Path("templates/thank_you.html").read_text(encoding="utf-8")) +login_template = Template(Path("templates/login.html").read_text(encoding="utf-8")) # Dodatne predloge, ki vsebuje samo del strani: # Prikaže sliko strani PDF dokumenta, skupaj s parom vprašanje-odgovor in gumbi @@ -198,6 +205,15 @@ def render_pdf_page(pdf_path: Path, page_number: int, bounding_box: dict) -> str return str(img_path) +@app.get("/login", response_class=HTMLResponse) +async def login_page(): + return HTMLResponse(login_template.render()) + +@app.post("/login", response_class=HTMLResponse) +async def login(request: Request, email: str = Form(...)): + request.session["email"] = email + return HTMLResponse('') + # Definicija glavne, t.i. home HTTP poti (ang. route): # - Če qa_data ne vsebuje nobenih parov vprašanj in odgovorov, prikažemo stran # 'no_qa'. @@ -206,36 +222,35 @@ def render_pdf_page(pdf_path: Path, page_number: int, bounding_box: dict) -> str # in gremo na naslednjega. # - Ko imamo veljaven HTML ga vstavimo v glavno predlogo index.html. @app.get("/", response_class=HTMLResponse) -def home(): - # Če je seznam prazen takoj prikažemo no_qa. - if not qa_data: - return HTMLResponse(no_qa_template.render()) - - # Poiščemo prvi element, ki se uspešno izriše. - idx = 0 - partial_html = "" - while idx < len(qa_data): - partial_html = render_qa_partial(idx, edit_mode=False) - if partial_html: - break - # Če renderiranje ni uspelo to zabeležimo in odstranimo izbrani element - # iz seznama. - logging.info(f"Skipping broken QA at index {idx}") - qa_data.pop(idx) - - # Če se seznam izprazni zaradi preskakovanja neveljavnih parov vprašanj in - # odgovorov prav tako prikaži no_qa. - if not qa_data: - return HTMLResponse(no_qa_template.render()) - - # HTML delno predlogo vstavimo v celotno predlogo. - final_html = index_template.render(qa_content=partial_html) +async def home(request: Request): + if "email" not in request.session: + return HTMLResponse('') + + async with qa_lock: + if not qa_data: + return HTMLResponse(no_qa_template.render()) + + idx = 0 + partial_html = "" + while idx < len(qa_data): + partial_html = render_qa_partial(idx, edit_mode=False) + if partial_html: + break + logging.info(f"Skipping broken QA at index {idx}") + qa_data.pop(idx) + + if not qa_data: + return HTMLResponse(no_qa_template.render()) + + final_html = index_template.render(qa_content=partial_html) return HTMLResponse(final_html) # HTTP pot, ki prikaže zahvalo, ko uporabnik pregleda vse pare # vprašanj in odgovorov. @app.get("/thank-you", response_class=HTMLResponse) -def thank_you(): +async def thank_you(request: Request): + if "email" not in request.session: + return HTMLResponse('') return HTMLResponse(thank_you_template.render()) # Funkcija vrne HTML fragment (brez in
), ki vsebuje: @@ -292,26 +307,30 @@ def render_qa_partial(index: int, edit_mode: bool) -> str: # ne najdemo takšnega, ki ga lahko prikažemo. Ko zmanjka elementov, uporabnika # preusmerimo na thank-you. @app.get("/edit_qa", response_class=HTMLResponse) -def edit_qa(index: int): - while index < len(qa_data): - partial = render_qa_partial(index, edit_mode=True) - if partial: - return HTMLResponse(partial) - logging.info(f"Skipping broken QA at index {index}") - qa_data.pop(index) - +async def edit_qa(request: Request, index: int): + if "email" not in request.session: + return HTMLResponse('') + async with qa_lock: + while index < len(qa_data): + partial = render_qa_partial(index, edit_mode=True) + if partial: + return HTMLResponse(partial) + logging.info(f"Skipping broken QA at index {index}") + qa_data.pop(index) return HTMLResponse('') # Deluje podobno kot pot /edit_qa, le da prikliče predlogo v načinu samo za branje. @app.get("/display_qa", response_class=HTMLResponse) -def display_qa(index: int): - while index < len(qa_data): - partial = render_qa_partial(index, edit_mode=False) - if partial: - return HTMLResponse(partial) - logging.info(f"Skipping broken QA at index {index}") - qa_data.pop(index) - +async def display_qa(request: Request, index: int): + if "email" not in request.session: + return HTMLResponse('') + async with qa_lock: + while index < len(qa_data): + partial = render_qa_partial(index, edit_mode=False) + if partial: + return HTMLResponse(partial) + logging.info(f"Skipping broken QA at index {index}") + qa_data.pop(index) return HTMLResponse('') # Procesiramo uporabnikovo evaluacijo para vprašanje-odgovor: @@ -328,73 +347,70 @@ def display_qa(index: int): # - Ko je element obdelan ga iz qa_data odstranimo. # - Če ni več elementov prikažemo thank-you, sicer poiščemo naslednjega. @app.post("/evaluate", response_class=HTMLResponse) -def evaluate( - # Indeks para, ki ga ocenjujemo. +async def evaluate( + request: Request, index: int = Form(...), - # Vrednosti: "skip", "adequate", "inadequate" ali "corrected". evaluation: str = Form(...), - # Popravljeno vprašanje (če evaluation == "corrected"). correctedQuestion: str = Form(None), - # Popravljen odgovor (če evaluation == "corrected"). correctedAnswer: str = Form(None) ): - # Preverimo ali indeks obstaja. - if index < 0 or index >= len(qa_data): - return HTMLResponse('') + if "email" not in request.session: + return HTMLResponse('') - item = qa_data[index] + async with qa_lock: + if index < 0 or index >= len(qa_data): + return HTMLResponse('') + + item = qa_data[index] + current_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") - # Pripravimo timestamp. - current_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") - - record = { - "question": item["question"], - "answer": item["answer"], - "text": item["text"], - "chunkID": item["chunkID"], - "fileUrl": item["fileUrl"], - "fileS3Path": item["fileS3Path"], - "fileName": item["fileName"], - "evaluation": None, - "correctedQuestion": None, - "correctedAnswer": None, - "skipped": False, - "timestamp": current_timestamp - } - - # Nastavimo ustrezna polja glede na izbrane parametre. - if evaluation == "skip": - record["evaluation"] = None - record["skipped"] = True - elif evaluation == "adequate": - record["evaluation"] = "adequate" - elif evaluation == "inadequate": - record["evaluation"] = "inadequate" - elif evaluation == "corrected": - record["evaluation"] = "corrected" - record["correctedQuestion"] = correctedQuestion - record["correctedAnswer"] = correctedAnswer - - # Preberemo obstoječo vsebino datoteke feedback.json in vanjo dodamo nov vnos. - existing = json.loads(feedback_path.read_text(encoding="utf-8")) - existing.append(record) - feedback_path.write_text(json.dumps(existing, indent=2, ensure_ascii=False), encoding="utf-8") - - # Obdelani par vprašanje-odgovor odstranimo iz spomina. - qa_data.pop(index) - - # Če ni več neobdelanih vprašanj prikažemo thank-you. - if not qa_data: - return HTMLResponse('') - - # V nasprotnem primeru poiščemo naslednjega, ki se uspešno izriše. - next_idx = index - while next_idx < len(qa_data): - partial = render_qa_partial(next_idx, edit_mode=False) - if partial: - return HTMLResponse(partial) - logging.info(f"Skipping broken QA at index {next_idx}") - qa_data.pop(next_idx) - - # Če se je seznam izpraznil prikažemo thank-you. - return HTMLResponse('') \ No newline at end of file + record = { + "question": item["question"], + "answer": item["answer"], + "text": item["text"], + "chunkID": item["chunkID"], + "fileUrl": item["fileUrl"], + "fileS3Path": item["fileS3Path"], + "fileName": item["fileName"], + "evaluation": None, + "correctedQuestion": None, + "correctedAnswer": None, + "skipped": False, + "timestamp": current_timestamp, + "userEmail": request.session.get("email") + } + + if evaluation == "skip": + record["evaluation"] = None + record["skipped"] = True + elif evaluation == "adequate": + record["evaluation"] = "adequate" + elif evaluation == "inadequate": + record["evaluation"] = "inadequate" + elif evaluation == "corrected": + record["evaluation"] = "corrected" + record["correctedQuestion"] = correctedQuestion + record["correctedAnswer"] = correctedAnswer + + with feedback_lock: + existing = json.loads(feedback_path.read_text(encoding="utf-8")) + existing.append(record) + feedback_path.write_text( + json.dumps(existing, indent=2, ensure_ascii=False), + encoding="utf-8" + ) + + qa_data.pop(index) + + if not qa_data: + return HTMLResponse('') + + next_idx = index + while next_idx < len(qa_data): + partial = render_qa_partial(next_idx, edit_mode=False) + if partial: + return HTMLResponse(partial) + logging.info(f"Skipping broken QA at index {next_idx}") + qa_data.pop(next_idx) + + return HTMLResponse('') diff --git a/requirements.txt b/requirements.txt index 7cc9519..b679f18 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ Pillow==11.2.1 pydantic==2.11.5 python-dotenv==1.1.0 Requests==2.32.3 +filelock==3.13.1 diff --git a/templates/login.html b/templates/login.html new file mode 100644 index 0000000..aa62a38 --- /dev/null +++ b/templates/login.html @@ -0,0 +1,58 @@ + + + + +
+