diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 0f11001..56a66d0 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -661,65 +661,7 @@ class CodeItem(TextItem): label: typing.Literal[DocItemLabel.CODE] = ( DocItemLabel.CODE # type: ignore[assignment] ) - code_language: typing.Literal[ - CodeLanguageLabel.ADA, - CodeLanguageLabel.AWK, - CodeLanguageLabel.BASH, - CodeLanguageLabel.C, - CodeLanguageLabel.C_SHARP, - CodeLanguageLabel.C_PLUS_PLUS, - CodeLanguageLabel.CMAKE, - CodeLanguageLabel.COBOL, - CodeLanguageLabel.CSS, - CodeLanguageLabel.CEYLON, - CodeLanguageLabel.CLOJURE, - CodeLanguageLabel.CRYSTAL, - CodeLanguageLabel.CUDA, - CodeLanguageLabel.CYTHON, - CodeLanguageLabel.D, - CodeLanguageLabel.DART, - CodeLanguageLabel.DOCKERFILE, - CodeLanguageLabel.ELIXIR, - CodeLanguageLabel.ERLANG, - CodeLanguageLabel.FORTRAN, - CodeLanguageLabel.FORTH, - CodeLanguageLabel.GO, - CodeLanguageLabel.HTML, - CodeLanguageLabel.HASKELL, - CodeLanguageLabel.HAXE, - CodeLanguageLabel.JAVA, - CodeLanguageLabel.JAVASCRIPT, - CodeLanguageLabel.JULIA, - CodeLanguageLabel.KOTLIN, - CodeLanguageLabel.LISP, - CodeLanguageLabel.LUA, - CodeLanguageLabel.MATLAB, - CodeLanguageLabel.MOONSCRIPT, - CodeLanguageLabel.NIM, - CodeLanguageLabel.OCAML, - CodeLanguageLabel.OBJECTIVEC, - CodeLanguageLabel.OCTAVE, - CodeLanguageLabel.PHP, - CodeLanguageLabel.PASCAL, - CodeLanguageLabel.PERL, - CodeLanguageLabel.PROLOG, - CodeLanguageLabel.PYTHON, - CodeLanguageLabel.RACKET, - CodeLanguageLabel.RUBY, - CodeLanguageLabel.RUST, - CodeLanguageLabel.SML, - CodeLanguageLabel.SQL, - CodeLanguageLabel.SCALA, - CodeLanguageLabel.SCHEME, - CodeLanguageLabel.SWIFT, - CodeLanguageLabel.TYPESCRIPT, - CodeLanguageLabel.VISUALBASIC, - CodeLanguageLabel.XML, - CodeLanguageLabel.YAML, - CodeLanguageLabel.BC, - CodeLanguageLabel.DC, - CodeLanguageLabel.UNKNOWN, - ] = CodeLanguageLabel.UNKNOWN + code_language: CodeLanguageLabel = CodeLanguageLabel.UNKNOWN class SectionHeaderItem(TextItem): diff --git a/docs/DoclingDocument.json b/docs/DoclingDocument.json index c51acce..6d3a9fa 100644 --- a/docs/DoclingDocument.json +++ b/docs/DoclingDocument.json @@ -213,68 +213,8 @@ "type": "string" }, "code_language": { - "default": "unknown", - "enum": [ - "Ada", - "Awk", - "Bash", - "C", - "C#", - "C++", - "CMake", - "COBOL", - "CSS", - "Ceylon", - "Clojure", - "Crystal", - "Cuda", - "Cython", - "D", - "Dart", - "Dockerfile", - "Elixir", - "Erlang", - "FORTRAN", - "Forth", - "Go", - "HTML", - "Haskell", - "Haxe", - "Java", - "JavaScript", - "Julia", - "Kotlin", - "Lisp", - "Lua", - "Matlab", - "MoonScript", - "Nim", - "OCaml", - "ObjectiveC", - "Octave", - "PHP", - "Pascal", - "Perl", - "Prolog", - "Python", - "Racket", - "Ruby", - "Rust", - "SML", - "SQL", - "Scala", - "Scheme", - "Swift", - "TypeScript", - "VisualBasic", - "XML", - "YAML", - "bc", - "dc", - "unknown" - ], - "title": "Code Language", - "type": "string" + "$ref": "#/$defs/CodeLanguageLabel", + "default": "unknown" } }, "required": [ @@ -285,6 +225,70 @@ "title": "CodeItem", "type": "object" }, + "CodeLanguageLabel": { + "description": "CodeLanguageLabel.", + "enum": [ + "Ada", + "Awk", + "Bash", + "C", + "C#", + "C++", + "CMake", + "COBOL", + "CSS", + "Ceylon", + "Clojure", + "Crystal", + "Cuda", + "Cython", + "D", + "Dart", + "Dockerfile", + "Elixir", + "Erlang", + "FORTRAN", + "Forth", + "Go", + "HTML", + "Haskell", + "Haxe", + "Java", + "JavaScript", + "Julia", + "Kotlin", + "Lisp", + "Lua", + "Matlab", + "MoonScript", + "Nim", + "OCaml", + "ObjectiveC", + "Octave", + "PHP", + "Pascal", + "Perl", + "Prolog", + "Python", + "Racket", + "Ruby", + "Rust", + "SML", + "SQL", + "Scala", + "Scheme", + "Swift", + "TypeScript", + "VisualBasic", + "XML", + "YAML", + "bc", + "dc", + "unknown" + ], + "title": "CodeLanguageLabel", + "type": "string" + }, "CoordOrigin": { "description": "CoordOrigin.", "enum": [