Skip to content

Commit

Permalink
fix: make enum serializable with human-readable value (#555)
Browse files Browse the repository at this point in the history
Signed-off-by: Michele Dolfi <[email protected]>
  • Loading branch information
dolfim-ibm authored Dec 10, 2024
1 parent eb30c4f commit a7df337
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 28 deletions.
18 changes: 2 additions & 16 deletions docling/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import (
EasyOcrOptions,
OcrEngine,
OcrMacOptions,
OcrOptions,
PdfBackend,
PdfPipelineOptions,
RapidOcrOptions,
TableFormerMode,
Expand Down Expand Up @@ -68,22 +70,6 @@ def version_callback(value: bool):
raise typer.Exit()


# Define an enum for the backend options
class PdfBackend(str, Enum):
PYPDFIUM2 = "pypdfium2"
DLPARSE_V1 = "dlparse_v1"
DLPARSE_V2 = "dlparse_v2"


# Define an enum for the ocr engines
class OcrEngine(str, Enum):
EASYOCR = "easyocr"
TESSERACT_CLI = "tesseract_cli"
TESSERACT = "tesseract"
OCRMAC = "ocrmac"
RAPIDOCR = "rapidocr"


def export_documents(
conv_results: Iterable[ConversionResult],
output_dir: Path,
Expand Down
24 changes: 12 additions & 12 deletions docling/datamodel/base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@


class ConversionStatus(str, Enum):
PENDING = auto()
STARTED = auto()
FAILURE = auto()
SUCCESS = auto()
PARTIAL_SUCCESS = auto()
SKIPPED = auto()
PENDING = "pending"
STARTED = "started"
FAILURE = "failure"
SUCCESS = "success"
PARTIAL_SUCCESS = "partial_success"
SKIPPED = "skipped"


class InputFormat(str, Enum):
Expand Down Expand Up @@ -89,15 +89,15 @@ class OutputFormat(str, Enum):


class DocInputType(str, Enum):
PATH = auto()
STREAM = auto()
PATH = "path"
STREAM = "stream"


class DoclingComponentType(str, Enum):
DOCUMENT_BACKEND = auto()
MODEL = auto()
DOC_ASSEMBLER = auto()
USER_INPUT = auto()
DOCUMENT_BACKEND = "document_backend"
MODEL = "model"
DOC_ASSEMBLER = "doc_assembler"
USER_INPUT = "user_input"


class ErrorItem(BaseModel):
Expand Down
20 changes: 20 additions & 0 deletions docling/datamodel/pipeline_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,26 @@ class OcrMacOptions(OcrOptions):
)


# Define an enum for the backend options
class PdfBackend(str, Enum):
"""Enum of valid PDF backends."""

PYPDFIUM2 = "pypdfium2"
DLPARSE_V1 = "dlparse_v1"
DLPARSE_V2 = "dlparse_v2"


# Define an enum for the ocr engines
class OcrEngine(str, Enum):
"""Enum of valid OCR engines."""

EASYOCR = "easyocr"
TESSERACT_CLI = "tesseract_cli"
TESSERACT = "tesseract"
OCRMAC = "ocrmac"
RAPIDOCR = "rapidocr"


class PipelineOptions(BaseModel):
"""Base pipeline options."""

Expand Down

0 comments on commit a7df337

Please sign in to comment.