use actual types in request models and refactor

Signed-off-by: Michele Dolfi <[email protected]>
DS4SD · dolfim-ibm · Feb 3, 2025 · Dec 10, 2024 · Jan 24, 2025 · Jan 24, 2025
commit 1f17348eba5d92f8d9cfb8be15d5436a33f6a56d
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,6 +16,14 @@ repos:
         pass_filenames: false
         language: system
         files: '\.py$'
+  - repo: local
+    hooks:
+      - id: autoflake
+        name: autoflake
+        entry: poetry run autoflake docling_serve tests
+        pass_filenames: false
+        language: system
+        files: '\.py$'
   - repo: local
     hooks:
       - id: system

diff --git a/docling_serve/app.py b/docling_serve/app.py
@@ -4,27 +4,27 @@
 import tempfile
 from contextlib import asynccontextmanager
 from pathlib import Path
-from typing import List, Optional, Union
+from typing import Annotated, List
 
 import gradio as gr
 from docling.datamodel.base_models import InputFormat
 from docling.document_converter import DocumentConverter
-from docling_conversion import (
+from dotenv import load_dotenv
+from fastapi import FastAPI, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import RedirectResponse
+from pydantic import BaseModel
+
+from docling_serve.docling_conversion import (
     ConvertDocumentsParameters,
     ConvertDocumentsRequest,
     convert_documents,
     converters,
     get_pdf_pipeline_opts,
 )
-from dotenv import load_dotenv
-from fastapi import Depends, FastAPI, File, Form, UploadFile
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import RedirectResponse
-from gradio_ui import ui as gradio_ui
-from helper_functions import _str_to_bool, _to_list_of_strings
-from pydantic import BaseModel
-from response_preparation import process_results
-from uvicorn import run
+from docling_serve.gradio_ui import ui as gradio_ui
+from docling_serve.helper_functions import FormDepends, _str_to_bool
+from docling_serve.response_preparation import process_results
 
 # Load local env vars if present
 load_dotenv()
@@ -160,45 +160,12 @@ def process_url(conversion_request: ConvertDocumentsRequest):
 # Convert a document from file(s)
 
 
-# Parameters parser: Form object needed for the file(s) conversion endpoint
-def _parse_parameters(
-    from_formats: Optional[Union[List[str], str]] = Form(["pdf", "docx"]),
-    to_formats: Optional[Union[List[str], str]] = Form(["md"]),
-    image_export_mode: Optional[str] = Form("embedded"),
-    do_ocr: Optional[Union[bool, str]] = Form("true"),
-    force_ocr: Optional[Union[bool, str]] = Form("false"),
-    ocr_engine: Optional[str] = Form("easyocr"),
-    ocr_lang: Optional[str] = Form("en"),
-    pdf_backend: Optional[str] = Form("dlparse_v2"),
-    table_mode: Optional[str] = Form("fast"),
-    abort_on_error: Optional[Union[bool, str]] = Form("false"),
-    return_as_file: Optional[Union[bool, str]] = Form("false"),
-    do_table_structure: Optional[Union[bool, str]] = Form("true"),
-    include_images: Optional[Union[bool, str]] = Form("true"),
-    images_scale: Optional[float] = Form(2.0),
-) -> ConvertDocumentsParameters:
-    return ConvertDocumentsParameters(
-        from_formats=_to_list_of_strings(from_formats) if from_formats else None,
-        to_formats=_to_list_of_strings(to_formats) if to_formats else None,
-        image_export_mode=image_export_mode.strip() if image_export_mode else None,
-        ocr=_str_to_bool(do_ocr),
-        force_ocr=_str_to_bool(force_ocr),
-        ocr_engine=ocr_engine.strip() if ocr_engine else None,
-        ocr_lang=ocr_lang.strip() if ocr_lang else None,
-        pdf_backend=pdf_backend.strip() if pdf_backend else None,
-        table_mode=table_mode.strip() if table_mode else None,
-        abort_on_error=_str_to_bool(abort_on_error),
-        return_as_file=_str_to_bool(return_as_file),
-        do_table_structure=_str_to_bool(do_table_structure),
-        include_images=_str_to_bool(include_images),
-        images_scale=images_scale,
-    )
-
-
 @app.post("/v1alpha/convert/file")
 async def process_file(
-    files: List[UploadFile] = File(...),
-    parameters: ConvertDocumentsParameters = Depends(_parse_parameters),
+    files: List[UploadFile],
+    parameters: Annotated[
+        ConvertDocumentsParameters, FormDepends(ConvertDocumentsParameters)
+    ],
 ):
 
     _log.info(f"Received {len(files)} files for processing.")
@@ -207,6 +174,8 @@ async def process_file(
     tmp_input_dir = Path(tempfile.mkdtemp())
 
     # Save the uploaded files to the temporary directory
+    # TODO: we could use the binary stream with Docling directly, using the file could
+    # indeed help when many jobs are queued with background tasks.
     file_paths = []
     for file in files:
         file_location = tmp_input_dir / file.filename  # type: ignore [operator]
@@ -216,18 +185,7 @@ async def process_file(
 
     # Process the files
     conversion_request = ConvertDocumentsRequest(
-        input_sources=file_paths,
-        from_formats=parameters.from_formats,
-        to_formats=parameters.to_formats,
-        image_export_mode=parameters.image_export_mode,
-        ocr=parameters.do_ocr,
-        force_ocr=parameters.force_ocr,
-        ocr_engine=parameters.ocr_engine,
-        ocr_lang=parameters.ocr_lang,
-        pdf_backend=parameters.pdf_backend,
-        table_mode=parameters.table_mode,
-        abort_on_error=parameters.abort_on_error,
-        return_as_file=parameters.return_as_file,
+        input_sources=file_paths, **parameters.model_dump()
     )
 
     results = convert_documents(conversion_request)
@@ -243,6 +201,8 @@ async def process_file(
 
 # Launch the FastAPI server
 if __name__ == "__main__":
+    from uvicorn import run
+
     port = int(os.getenv("PORT", "8080"))
     workers = int(os.getenv("UVICORN_WORKERS", "1"))
     reload = _str_to_bool(os.getenv("RELOAD", "False"))