Skip to content

Commit b68912c

Browse files
committed
Parse model output using pre-determined keys.
1 parent cf9508e commit b68912c

File tree

5 files changed

+54
-4
lines changed

5 files changed

+54
-4
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
venv/
1+
venv*
22
*.pyc
33
*.pyo
44
*.pyd

backend/src/routes/transcription-route.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ router.post("/", auth, async (req, res) => {
1212
}
1313

1414
const imageFile = req.files.image;
15+
// const keyFile = req.files.keys; // TODO: implement ability to send key file
1516
const formData = new FormData();
1617
formData.append("image", imageFile.data, imageFile.name);
1718

1819
const response = await axios.post(
19-
"http://localhost:5000/transcribe",
20+
"http://127.0.0.1:5000/transcribe", //TODO: change this endpoint after deploying
2021
formData,
2122
{
2223
headers: {

transcription/app.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from transformers import AutoProcessor, AutoModelForCausalLM
55
import torch
66

7+
from transcription import load_keys, parse_florence_output
8+
79
app = Flask(__name__)
810
CORS(app)
911

@@ -16,6 +18,7 @@
1618

1719
@app.route("/api/transcribe", methods=["POST"])
1820
def transcribe():
21+
print("START OF ENDPOINT")
1922
if "image" not in request.files:
2023
return jsonify({"error": "No image file provided"}), 400
2124

@@ -33,8 +36,9 @@ def transcribe():
3336
do_sample=False
3437
)
3538
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
36-
37-
return jsonify({"transcription": generated_text})
39+
keys = load_keys("keys.json")
40+
json_result = parse_florence_output(generated_text, keys)
41+
return json_result
3842
except Exception as e:
3943
return jsonify({"error": str(e)}), 500
4044

transcription/keys.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"keys": [
3+
"Case No.",
4+
"Patient ID",
5+
"Type",
6+
"Surgeon",
7+
"OR Date",
8+
"Age",
9+
"M/F",
10+
"Indication for Surgery/Reason for Referral",
11+
"HPI",
12+
"Meds",
13+
"Allergies",
14+
"ID",
15+
"PMHx",
16+
"Social"
17+
]
18+
}

transcription/transcription.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import json
2+
import re
3+
4+
5+
def load_keys(filePath):
6+
with open(filePath, 'r') as file:
7+
data = json.load(file)
8+
return data['keys']
9+
10+
def parse_florence_output(output, keys):
11+
if isinstance(output, dict):
12+
output = json.dumps(output) # convert to JSON-formatted string
13+
14+
parsed_data = {}
15+
16+
for key in keys:
17+
# Use regex to find the value for the key
18+
pattern = re.compile(f"{re.escape(key)}:(.*?)(?=(?:{'|'.join(map(re.escape, keys))}|$))", re.DOTALL)
19+
match = pattern.search(output)
20+
21+
if match:
22+
value = match.group(1).strip()
23+
parsed_data[key] = value
24+
25+
# Convert the parsed data to JSON format
26+
json_data = json.dumps(parsed_data, indent=4)
27+
return json_data

0 commit comments

Comments
 (0)