Parse model output using pre-determined keys.

romansinkus · romansinkus · commit b68912ce5a48 · 2024-12-05T23:49:56.000-08:00
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,4 @@
-venv/
+venv*
 *.pyc
 *.pyo
 *.pyd
diff --git a/backend/src/routes/transcription-route.js b/backend/src/routes/transcription-route.js
@@ -12,11 +12,12 @@ router.post("/", auth, async (req, res) => {
     }
 
     const imageFile = req.files.image;
+    // const keyFile = req.files.keys; // TODO: implement ability to send key file
     const formData = new FormData();
     formData.append("image", imageFile.data, imageFile.name);
 
     const response = await axios.post(
-      "http://localhost:5000/transcribe",
+      "http://127.0.0.1:5000/transcribe", //TODO: change this endpoint after deploying
       formData,
       {
         headers: {
diff --git a/transcription/app.py b/transcription/app.py
@@ -4,6 +4,8 @@
 from transformers import AutoProcessor, AutoModelForCausalLM
 import torch
 
+from transcription import load_keys, parse_florence_output
+
 app = Flask(__name__)
 CORS(app)
 
@@ -16,6 +18,7 @@
 
 @app.route("/api/transcribe", methods=["POST"])
 def transcribe():
+    print("START OF ENDPOINT")
     if "image" not in request.files:
         return jsonify({"error": "No image file provided"}), 400
 
@@ -33,8 +36,9 @@ def transcribe():
             do_sample=False
         )
         generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-
-        return jsonify({"transcription": generated_text})
+        keys = load_keys("keys.json")
+        json_result = parse_florence_output(generated_text, keys)
+        return json_result
     except Exception as e:
         return jsonify({"error": str(e)}), 500
 
diff --git a/transcription/keys.json b/transcription/keys.json
@@ -0,0 +1,18 @@
+{
+    "keys": [
+        "Case No.",
+        "Patient ID",
+        "Type",
+        "Surgeon",
+        "OR Date",
+        "Age",
+        "M/F",
+        "Indication for Surgery/Reason for Referral",
+        "HPI",
+        "Meds",
+        "Allergies",
+        "ID",
+        "PMHx",
+        "Social"
+    ]
+}
diff --git a/transcription/transcription.py b/transcription/transcription.py
@@ -0,0 +1,27 @@
+import json
+import re
+
+
+def load_keys(filePath):
+    with open(filePath, 'r') as file:
+        data = json.load(file)
+    return data['keys']
+
+def parse_florence_output(output, keys):
+    if isinstance(output, dict):
+        output = json.dumps(output) # convert to JSON-formatted string
+    
+    parsed_data = {}
+    
+    for key in keys:
+        # Use regex to find the value for the key
+        pattern = re.compile(f"{re.escape(key)}:(.*?)(?=(?:{'|'.join(map(re.escape, keys))}|$))", re.DOTALL)
+        match = pattern.search(output)
+
+        if match:
+            value = match.group(1).strip()
+            parsed_data[key] = value
+
+    # Convert the parsed data to JSON format
+    json_data = json.dumps(parsed_data, indent=4)
+    return json_data

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-venv/`
	`1`	`+venv*`
`2`	`2`	`*.pyc`
`3`	`3`	`*.pyo`
`4`	`4`	`*.pyd`
Original file line number	Diff line number	Diff line change
`@@ -12,11 +12,12 @@ router.post("/", auth, async (req, res) => {`
`12`	`12`	`}`
`13`	`13`
`14`	`14`	`const imageFile = req.files.image;`
	`15`	`+ // const keyFile = req.files.keys; // TODO: implement ability to send key file`
`15`	`16`	`const formData = new FormData();`
`16`	`17`	`formData.append("image", imageFile.data, imageFile.name);`
`17`	`18`
`18`	`19`	`const response = await axios.post(`
`19`		`- "http://localhost:5000/transcribe",`
	`20`	`+ "http://127.0.0.1:5000/transcribe", //TODO: change this endpoint after deploying`
`20`	`21`	`formData,`
`21`	`22`	`{`
`22`	`23`	`headers: {`