Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Sistem implementira dvo-fazni pristop za ustvarjanje kakovostnih QA parov:

2. Namestite odvisnosti:
```bash
pip install fastapi uvicorn python-dotenv minio openai pydantic PyMuPDF pillow requests jinja2
pip install fastapi uvicorn python-dotenv minio openai pydantic PyMuPDF pillow requests jinja2 filelock
```

3. Ustvarite `.env` datoteko z naslednjimi spremenljivkami:
Expand Down Expand Up @@ -78,9 +78,10 @@ Zaženite spletni strežnik:
uvicorn app:app --reload --host 0.0.0.0 --port 8000
```

Pregledovalec QA parov lahko sedaj odpre brskalnik na http://localhost:8000 in:
Pregledovalec QA parov najprej v brskalniku odpre http://localhost:8000/login,
se prijavi z e-naslovom in nato dostopa do http://localhost:8000, kjer lahko:

- Pregleda prikazane pare vprašanj in odgovorov.
- Jih označi kot "Ustrezen", "Neustrezen" ali "Preskoči".
- Po potrebi uredi in popravi njihovo vsebino.
- Povratne informacije se shranjujejo v app_data/feedback.json.
- Povratne informacije se shranjujejo v app_data/feedback.json.
226 changes: 121 additions & 105 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@

# Spletni strežnik, ki skrbi za prikaz spletne strani in sprejemanje/vraćanje
# podatkov.
from fastapi import FastAPI, Form
from fastapi import FastAPI, Form, Request
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from starlette.middleware.sessions import SessionMiddleware
# Delo z datotečnimi potmi.
from pathlib import Path
# Shranjevanje in branje podatkov v JSON formatu.
Expand All @@ -30,9 +31,12 @@
from datetime import datetime
# Beleženje dogodkov in napak.
import logging
import asyncio
from filelock import FileLock

# Ustvarimo instanco FastAPI aplikacije.
app = FastAPI()
app.add_middleware(SessionMiddleware, secret_key="change-me")

# Nastavimo, da se vsebina mape "static" streže pod URLjem /static.
app.mount("/static", StaticFiles(directory="static"), name="static")
Expand Down Expand Up @@ -68,6 +72,8 @@
})
# Končni seznam parov vprašanj in odgovorov, skupaj z metapodatki.
qa_data = flattened_data
qa_lock = asyncio.Lock()
feedback_lock = FileLock(str(feedback_path) + ".lock")

# Iz diska naložimo HTML predloge, ki so v obliki samostojnih strani:
# Prikaže glavno stran z enim parom vprašanje-odgovor.
Expand All @@ -76,6 +82,7 @@
no_qa_template = Template(Path("templates/no_qa.html").read_text(encoding="utf-8"))
# Izpiše zahvalo uporabniku za sodelovanje.
thank_you_template = Template(Path("templates/thank_you.html").read_text(encoding="utf-8"))
login_template = Template(Path("templates/login.html").read_text(encoding="utf-8"))

# Dodatne predloge, ki vsebuje samo del strani:
# Prikaže sliko strani PDF dokumenta, skupaj s parom vprašanje-odgovor in gumbi
Expand Down Expand Up @@ -198,6 +205,15 @@ def render_pdf_page(pdf_path: Path, page_number: int, bounding_box: dict) -> str

return str(img_path)

@app.get("/login", response_class=HTMLResponse)
async def login_page():
return HTMLResponse(login_template.render())

@app.post("/login", response_class=HTMLResponse)
async def login(request: Request, email: str = Form(...)):
request.session["email"] = email
return HTMLResponse('<script>window.location.href="/";</script>')

# Definicija glavne, t.i. home HTTP poti (ang. route):
# - Če qa_data ne vsebuje nobenih parov vprašanj in odgovorov, prikažemo stran
# 'no_qa'.
Expand All @@ -206,36 +222,35 @@ def render_pdf_page(pdf_path: Path, page_number: int, bounding_box: dict) -> str
# in gremo na naslednjega.
# - Ko imamo veljaven HTML ga vstavimo v glavno predlogo index.html.
@app.get("/", response_class=HTMLResponse)
def home():
# Če je seznam prazen takoj prikažemo no_qa.
if not qa_data:
return HTMLResponse(no_qa_template.render())

# Poiščemo prvi element, ki se uspešno izriše.
idx = 0
partial_html = ""
while idx < len(qa_data):
partial_html = render_qa_partial(idx, edit_mode=False)
if partial_html:
break
# Če renderiranje ni uspelo to zabeležimo in odstranimo izbrani element
# iz seznama.
logging.info(f"Skipping broken QA at index {idx}")
qa_data.pop(idx)

# Če se seznam izprazni zaradi preskakovanja neveljavnih parov vprašanj in
# odgovorov prav tako prikaži no_qa.
if not qa_data:
return HTMLResponse(no_qa_template.render())

# HTML delno predlogo vstavimo v celotno predlogo.
final_html = index_template.render(qa_content=partial_html)
async def home(request: Request):
if "email" not in request.session:
return HTMLResponse('<script>window.location.href="/login";</script>')

async with qa_lock:
if not qa_data:
return HTMLResponse(no_qa_template.render())

idx = 0
partial_html = ""
while idx < len(qa_data):
partial_html = render_qa_partial(idx, edit_mode=False)
if partial_html:
break
logging.info(f"Skipping broken QA at index {idx}")
qa_data.pop(idx)

if not qa_data:
return HTMLResponse(no_qa_template.render())

final_html = index_template.render(qa_content=partial_html)
return HTMLResponse(final_html)

# HTTP pot, ki prikaže zahvalo, ko uporabnik pregleda vse pare
# vprašanj in odgovorov.
@app.get("/thank-you", response_class=HTMLResponse)
def thank_you():
async def thank_you(request: Request):
if "email" not in request.session:
return HTMLResponse('<script>window.location.href="/login";</script>')
return HTMLResponse(thank_you_template.render())

# Funkcija vrne HTML fragment (brez <html> in <body>), ki vsebuje:
Expand Down Expand Up @@ -292,26 +307,30 @@ def render_qa_partial(index: int, edit_mode: bool) -> str:
# ne najdemo takšnega, ki ga lahko prikažemo. Ko zmanjka elementov, uporabnika
# preusmerimo na thank-you.
@app.get("/edit_qa", response_class=HTMLResponse)
def edit_qa(index: int):
while index < len(qa_data):
partial = render_qa_partial(index, edit_mode=True)
if partial:
return HTMLResponse(partial)
logging.info(f"Skipping broken QA at index {index}")
qa_data.pop(index)

async def edit_qa(request: Request, index: int):
if "email" not in request.session:
return HTMLResponse('<script>window.location.href="/login";</script>')
async with qa_lock:
while index < len(qa_data):
partial = render_qa_partial(index, edit_mode=True)
if partial:
return HTMLResponse(partial)
logging.info(f"Skipping broken QA at index {index}")
qa_data.pop(index)
return HTMLResponse('<script>window.location.href="/thank-you";</script>')

# Deluje podobno kot pot /edit_qa, le da prikliče predlogo v načinu samo za branje.
@app.get("/display_qa", response_class=HTMLResponse)
def display_qa(index: int):
while index < len(qa_data):
partial = render_qa_partial(index, edit_mode=False)
if partial:
return HTMLResponse(partial)
logging.info(f"Skipping broken QA at index {index}")
qa_data.pop(index)

async def display_qa(request: Request, index: int):
if "email" not in request.session:
return HTMLResponse('<script>window.location.href="/login";</script>')
async with qa_lock:
while index < len(qa_data):
partial = render_qa_partial(index, edit_mode=False)
if partial:
return HTMLResponse(partial)
logging.info(f"Skipping broken QA at index {index}")
qa_data.pop(index)
return HTMLResponse('<script>window.location.href="/thank-you";</script>')

# Procesiramo uporabnikovo evaluacijo para vprašanje-odgovor:
Expand All @@ -328,73 +347,70 @@ def display_qa(index: int):
# - Ko je element obdelan ga iz qa_data odstranimo.
# - Če ni več elementov prikažemo thank-you, sicer poiščemo naslednjega.
@app.post("/evaluate", response_class=HTMLResponse)
def evaluate(
# Indeks para, ki ga ocenjujemo.
async def evaluate(
request: Request,
index: int = Form(...),
# Vrednosti: "skip", "adequate", "inadequate" ali "corrected".
evaluation: str = Form(...),
# Popravljeno vprašanje (če evaluation == "corrected").
correctedQuestion: str = Form(None),
# Popravljen odgovor (če evaluation == "corrected").
correctedAnswer: str = Form(None)
):
# Preverimo ali indeks obstaja.
if index < 0 or index >= len(qa_data):
return HTMLResponse('<script>window.location.href="/thank-you";</script>')
if "email" not in request.session:
return HTMLResponse('<script>window.location.href="/login";</script>')

item = qa_data[index]
async with qa_lock:
if index < 0 or index >= len(qa_data):
return HTMLResponse('<script>window.location.href="/thank-you";</script>')

item = qa_data[index]
current_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")

# Pripravimo timestamp.
current_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")

record = {
"question": item["question"],
"answer": item["answer"],
"text": item["text"],
"chunkID": item["chunkID"],
"fileUrl": item["fileUrl"],
"fileS3Path": item["fileS3Path"],
"fileName": item["fileName"],
"evaluation": None,
"correctedQuestion": None,
"correctedAnswer": None,
"skipped": False,
"timestamp": current_timestamp
}

# Nastavimo ustrezna polja glede na izbrane parametre.
if evaluation == "skip":
record["evaluation"] = None
record["skipped"] = True
elif evaluation == "adequate":
record["evaluation"] = "adequate"
elif evaluation == "inadequate":
record["evaluation"] = "inadequate"
elif evaluation == "corrected":
record["evaluation"] = "corrected"
record["correctedQuestion"] = correctedQuestion
record["correctedAnswer"] = correctedAnswer

# Preberemo obstoječo vsebino datoteke feedback.json in vanjo dodamo nov vnos.
existing = json.loads(feedback_path.read_text(encoding="utf-8"))
existing.append(record)
feedback_path.write_text(json.dumps(existing, indent=2, ensure_ascii=False), encoding="utf-8")

# Obdelani par vprašanje-odgovor odstranimo iz spomina.
qa_data.pop(index)

# Če ni več neobdelanih vprašanj prikažemo thank-you.
if not qa_data:
return HTMLResponse('<script>window.location.href="/thank-you";</script>')

# V nasprotnem primeru poiščemo naslednjega, ki se uspešno izriše.
next_idx = index
while next_idx < len(qa_data):
partial = render_qa_partial(next_idx, edit_mode=False)
if partial:
return HTMLResponse(partial)
logging.info(f"Skipping broken QA at index {next_idx}")
qa_data.pop(next_idx)

# Če se je seznam izpraznil prikažemo thank-you.
return HTMLResponse('<script>window.location.href="/thank-you";</script>')
record = {
"question": item["question"],
"answer": item["answer"],
"text": item["text"],
"chunkID": item["chunkID"],
"fileUrl": item["fileUrl"],
"fileS3Path": item["fileS3Path"],
"fileName": item["fileName"],
"evaluation": None,
"correctedQuestion": None,
"correctedAnswer": None,
"skipped": False,
"timestamp": current_timestamp,
"userEmail": request.session.get("email")
}

if evaluation == "skip":
record["evaluation"] = None
record["skipped"] = True
elif evaluation == "adequate":
record["evaluation"] = "adequate"
elif evaluation == "inadequate":
record["evaluation"] = "inadequate"
elif evaluation == "corrected":
record["evaluation"] = "corrected"
record["correctedQuestion"] = correctedQuestion
record["correctedAnswer"] = correctedAnswer

with feedback_lock:
existing = json.loads(feedback_path.read_text(encoding="utf-8"))
existing.append(record)
feedback_path.write_text(
json.dumps(existing, indent=2, ensure_ascii=False),
encoding="utf-8"
)

qa_data.pop(index)

if not qa_data:
return HTMLResponse('<script>window.location.href="/thank-you";</script>')

next_idx = index
while next_idx < len(qa_data):
partial = render_qa_partial(next_idx, edit_mode=False)
if partial:
return HTMLResponse(partial)
logging.info(f"Skipping broken QA at index {next_idx}")
qa_data.pop(next_idx)

return HTMLResponse('<script>window.location.href="/thank-you";</script>')
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ Pillow==11.2.1
pydantic==2.11.5
python-dotenv==1.1.0
Requests==2.32.3
filelock==3.13.1
58 changes: 58 additions & 0 deletions templates/login.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Prijava</title>
<link rel="icon" type="image/png" href="/assets/zrsvn_logo.png?t=12345">
<script src="https://unpkg.com/[email protected]"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css">
<link href="https://fonts.googleapis.com/css2?family=Open+Sans&display=swap" rel="stylesheet">
<style>
body {
font-family: 'Open Sans', sans-serif;
text-align: center;
margin: 0;
padding: 20px;
}
.header-container {
display: flex;
align-items: center;
justify-content: center;
padding: 20px;
}
.header-container img {
height: 50px;
margin-right: 15px;
vertical-align: middle;
}
.header-container h2 {
margin: 0;
font-size: 24px;
line-height: 1;
display: flex;
align-items: center;
}
#login-container {
display: inline-block;
text-align: left;
width: 60%;
max-width: 400px;
}
</style>
</head>
<body>
<div class="header-container">
<img src="/assets/zrsvn_logo.png" alt="ZRSVN Logo">
<h2>Vrednotenje parov vprašanj in odgovorov</h2>
</div>
<div id="login-container">
<h3>Vnesi svoj e-naslov</h3>
<form method="post" action="/login" class="mt-3">
<div class="mb-3">
<input type="email" name="email" class="form-control" required>
</div>
<button type="submit" class="btn btn-primary w-100">Prijava</button>
</form>
</div>
</body>
</html>