From bfeb2db24b70550693911af6aee01db8c74d464a Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Fri, 1 Nov 2024 13:41:16 +0100 Subject: [PATCH] fix: include titles to chunk heading metadata (#62) Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- .../chunker/hierarchical_chunker.py | 13 +- test/data/chunker/0_inp_dl_doc.json | 19 ++- test/data/chunker/0_out_chunks.json | 123 +++++++++++++++ test/data/chunker/1_out_chunks.json | 142 ++++++++++++++++++ 4 files changed, 289 insertions(+), 8 deletions(-) diff --git a/docling_core/transforms/chunker/hierarchical_chunker.py b/docling_core/transforms/chunker/hierarchical_chunker.py index fe558dc..dcfab85 100644 --- a/docling_core/transforms/chunker/hierarchical_chunker.py +++ b/docling_core/transforms/chunker/hierarchical_chunker.py @@ -183,14 +183,15 @@ def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]: ) list_items = [] # reset - if isinstance( - item, SectionHeaderItem - ) or ( # TODO remove when all captured as SectionHeaderItem: + if isinstance(item, SectionHeaderItem) or ( isinstance(item, TextItem) - and item.label == DocItemLabel.SECTION_HEADER + and item.label in [DocItemLabel.SECTION_HEADER, DocItemLabel.TITLE] ): - # TODO second branch not needed once cleanup above complete: - level = item.level if isinstance(item, SectionHeaderItem) else 1 + level = ( + item.level + if isinstance(item, SectionHeaderItem) + else (0 if item.label == DocItemLabel.TITLE else 1) + ) heading_by_level[level] = item.text # remove headings of higher level as they just went out of scope diff --git a/test/data/chunker/0_inp_dl_doc.json b/test/data/chunker/0_inp_dl_doc.json index cff8414..724da23 100644 --- a/test/data/chunker/0_inp_dl_doc.json +++ b/test/data/chunker/0_inp_dl_doc.json @@ -547,7 +547,7 @@ "$ref": "#/body" }, "children": [], - "label": "section_header", + "label": "title", "prov": [ { "page_no": 1, @@ -652,7 +652,7 @@ }, "children": [], "label": "section_header", - "level": 2, + "level": 1, "prov": [ { "page_no": 1, @@ -705,6 +705,7 @@ }, "children": [], "label": "section_header", + "level": 1, "prov": [ { "page_no": 1, @@ -1017,6 +1018,7 @@ }, "children": [], "label": "section_header", + "level": 1, "prov": [ { "page_no": 2, @@ -1147,6 +1149,7 @@ }, "children": [], "label": "section_header", + "level": 1, "prov": [ { "page_no": 2, @@ -1199,6 +1202,7 @@ }, "children": [], "label": "section_header", + "level": 2, "prov": [ { "page_no": 2, @@ -1381,6 +1385,7 @@ }, "children": [], "label": "section_header", + "level": 2, "prov": [ { "page_no": 3, @@ -1433,6 +1438,7 @@ }, "children": [], "label": "section_header", + "level": 3, "prov": [ { "page_no": 3, @@ -1511,6 +1517,7 @@ }, "children": [], "label": "section_header", + "level": 3, "prov": [ { "page_no": 3, @@ -1615,6 +1622,7 @@ }, "children": [], "label": "section_header", + "level": 3, "prov": [ { "page_no": 4, @@ -1693,6 +1701,7 @@ }, "children": [], "label": "section_header", + "level": 2, "prov": [ { "page_no": 4, @@ -1745,6 +1754,7 @@ }, "children": [], "label": "section_header", + "level": 2, "prov": [ { "page_no": 4, @@ -1823,6 +1833,7 @@ }, "children": [], "label": "section_header", + "level": 1, "prov": [ { "page_no": 4, @@ -2005,6 +2016,7 @@ }, "children": [], "label": "section_header", + "level": 1, "prov": [ { "page_no": 5, @@ -2057,6 +2069,7 @@ }, "children": [], "label": "section_header", + "level": 1, "prov": [ { "page_no": 5, @@ -2135,6 +2148,7 @@ }, "children": [], "label": "section_header", + "level": 1, "prov": [ { "page_no": 5, @@ -2655,6 +2669,7 @@ }, "children": [], "label": "section_header", + "level": 1, "prov": [ { "page_no": 7, diff --git a/test/data/chunker/0_out_chunks.json b/test/data/chunker/0_out_chunks.json index 4c7bf36..750f12a 100644 --- a/test/data/chunker/0_out_chunks.json +++ b/test/data/chunker/0_out_chunks.json @@ -235,6 +235,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -276,6 +277,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -317,6 +319,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -358,6 +361,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -399,6 +403,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -560,6 +565,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -601,6 +607,7 @@ } ], "headings": [ + "Docling Technical Report", "2 Getting Started" ], "origin": { @@ -642,6 +649,7 @@ } ], "headings": [ + "Docling Technical Report", "2 Getting Started" ], "origin": { @@ -683,6 +691,7 @@ } ], "headings": [ + "Docling Technical Report", "2 Getting Started" ], "origin": { @@ -724,6 +733,7 @@ } ], "headings": [ + "Docling Technical Report", "2 Getting Started" ], "origin": { @@ -765,6 +775,7 @@ } ], "headings": [ + "Docling Technical Report", "3 Processing pipeline" ], "origin": { @@ -806,6 +817,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -847,6 +860,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -888,6 +903,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -929,6 +946,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -970,6 +989,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -1011,6 +1032,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -1052,6 +1075,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.2 AI models" ], "origin": { @@ -1093,6 +1118,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "Layout Analysis Model" ], "origin": { @@ -1134,6 +1162,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "Layout Analysis Model" ], "origin": { @@ -1175,6 +1206,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "Table Structure Recognition" ], "origin": { @@ -1216,6 +1250,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "Table Structure Recognition" ], "origin": { @@ -1257,6 +1294,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "Table Structure Recognition" ], "origin": { @@ -1298,6 +1338,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "OCR" ], "origin": { @@ -1339,6 +1382,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "OCR" ], "origin": { @@ -1380,6 +1426,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.3 Assembly" ], "origin": { @@ -1421,6 +1469,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.4 Extensibility" ], "origin": { @@ -1462,6 +1512,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.4 Extensibility" ], "origin": { @@ -1503,6 +1555,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1544,6 +1597,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1585,6 +1639,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1626,6 +1681,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1667,6 +1723,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1708,6 +1765,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1749,6 +1807,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "captions": [ @@ -1793,6 +1852,7 @@ } ], "headings": [ + "Docling Technical Report", "5 Applications" ], "origin": { @@ -1834,6 +1894,7 @@ } ], "headings": [ + "Docling Technical Report", "6 Future work and contributions" ], "origin": { @@ -1875,6 +1936,7 @@ } ], "headings": [ + "Docling Technical Report", "6 Future work and contributions" ], "origin": { @@ -1940,6 +2002,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -1981,6 +2044,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2022,6 +2086,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2375,6 +2440,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2416,6 +2482,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2457,6 +2524,7 @@ } ], "headings": [ + "Docling Technical Report", "Appendix" ], "origin": { @@ -2498,6 +2566,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2539,6 +2608,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2580,6 +2650,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2621,6 +2692,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2662,6 +2734,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2703,6 +2776,7 @@ } ], "headings": [ + "Docling Technical Report", "ABSTRACT" ], "origin": { @@ -2744,6 +2818,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -2785,6 +2860,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -2826,6 +2902,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2867,6 +2944,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2908,6 +2986,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2949,6 +3028,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2990,6 +3070,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -3031,6 +3112,7 @@ } ], "headings": [ + "Docling Technical Report", "ABSTRACT" ], "origin": { @@ -3072,6 +3154,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -3113,6 +3196,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -3154,6 +3238,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -3195,6 +3280,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -3236,6 +3322,7 @@ } ], "headings": [ + "Docling Technical Report", "KEYWORDS" ], "origin": { @@ -3277,6 +3364,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3318,6 +3406,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3359,6 +3448,7 @@ } ], "headings": [ + "Docling Technical Report", "KEYWORDS" ], "origin": { @@ -3400,6 +3490,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3441,6 +3532,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3482,6 +3574,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3523,6 +3616,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3564,6 +3658,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3605,6 +3700,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3646,6 +3742,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3687,6 +3784,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "captions": [ @@ -3731,6 +3829,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3772,6 +3871,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -3813,6 +3913,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -3854,6 +3955,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -3895,6 +3997,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -3936,6 +4039,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -3977,6 +4081,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4018,6 +4123,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4059,6 +4165,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4100,6 +4207,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4141,6 +4249,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4182,6 +4291,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4223,6 +4333,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4264,6 +4375,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4305,6 +4417,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4346,6 +4459,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4387,6 +4501,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4428,6 +4543,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4469,6 +4585,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "captions": [ @@ -4513,6 +4630,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4554,6 +4672,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4595,6 +4714,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4636,6 +4756,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4677,6 +4798,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4718,6 +4840,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { diff --git a/test/data/chunker/1_out_chunks.json b/test/data/chunker/1_out_chunks.json index 066055e..2b141c9 100644 --- a/test/data/chunker/1_out_chunks.json +++ b/test/data/chunker/1_out_chunks.json @@ -235,6 +235,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -276,6 +277,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -317,6 +319,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -358,6 +361,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -399,6 +403,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -440,6 +445,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -481,6 +487,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -522,6 +529,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -563,6 +571,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -604,6 +613,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -645,6 +655,7 @@ } ], "headings": [ + "Docling Technical Report", "1 Introduction" ], "origin": { @@ -686,6 +697,7 @@ } ], "headings": [ + "Docling Technical Report", "2 Getting Started" ], "origin": { @@ -727,6 +739,7 @@ } ], "headings": [ + "Docling Technical Report", "2 Getting Started" ], "origin": { @@ -768,6 +781,7 @@ } ], "headings": [ + "Docling Technical Report", "2 Getting Started" ], "origin": { @@ -809,6 +823,7 @@ } ], "headings": [ + "Docling Technical Report", "2 Getting Started" ], "origin": { @@ -850,6 +865,7 @@ } ], "headings": [ + "Docling Technical Report", "3 Processing pipeline" ], "origin": { @@ -891,6 +907,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -932,6 +950,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -973,6 +993,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -1014,6 +1036,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -1055,6 +1079,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -1096,6 +1122,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.1 PDF backends" ], "origin": { @@ -1137,6 +1165,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.2 AI models" ], "origin": { @@ -1178,6 +1208,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "Layout Analysis Model" ], "origin": { @@ -1219,6 +1252,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "Layout Analysis Model" ], "origin": { @@ -1260,6 +1296,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "Table Structure Recognition" ], "origin": { @@ -1301,6 +1340,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "Table Structure Recognition" ], "origin": { @@ -1342,6 +1384,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "Table Structure Recognition" ], "origin": { @@ -1383,6 +1428,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "OCR" ], "origin": { @@ -1424,6 +1472,9 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", + "3.2 AI models", "OCR" ], "origin": { @@ -1465,6 +1516,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.3 Assembly" ], "origin": { @@ -1506,6 +1559,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.4 Extensibility" ], "origin": { @@ -1547,6 +1602,8 @@ } ], "headings": [ + "Docling Technical Report", + "3 Processing pipeline", "3.4 Extensibility" ], "origin": { @@ -1588,6 +1645,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1629,6 +1687,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1670,6 +1729,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1711,6 +1771,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1752,6 +1813,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1793,6 +1855,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "origin": { @@ -1834,6 +1897,7 @@ } ], "headings": [ + "Docling Technical Report", "4 Performance" ], "captions": [ @@ -1878,6 +1942,7 @@ } ], "headings": [ + "Docling Technical Report", "5 Applications" ], "origin": { @@ -1919,6 +1984,7 @@ } ], "headings": [ + "Docling Technical Report", "6 Future work and contributions" ], "origin": { @@ -1960,6 +2026,7 @@ } ], "headings": [ + "Docling Technical Report", "6 Future work and contributions" ], "origin": { @@ -2001,6 +2068,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2042,6 +2110,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2083,6 +2152,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2124,6 +2194,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2165,6 +2236,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2206,6 +2278,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2247,6 +2320,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2288,6 +2362,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2329,6 +2404,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2370,6 +2446,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2411,6 +2488,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2452,6 +2530,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2493,6 +2572,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2534,6 +2614,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2575,6 +2656,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2616,6 +2698,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2657,6 +2740,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2698,6 +2782,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2739,6 +2824,7 @@ } ], "headings": [ + "Docling Technical Report", "References" ], "origin": { @@ -2780,6 +2866,7 @@ } ], "headings": [ + "Docling Technical Report", "Appendix" ], "origin": { @@ -2821,6 +2908,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2862,6 +2950,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2903,6 +2992,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2944,6 +3034,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -2985,6 +3076,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -3026,6 +3118,7 @@ } ], "headings": [ + "Docling Technical Report", "ABSTRACT" ], "origin": { @@ -3067,6 +3160,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -3108,6 +3202,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -3149,6 +3244,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -3190,6 +3286,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -3231,6 +3328,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -3272,6 +3370,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -3313,6 +3412,7 @@ } ], "headings": [ + "Docling Technical Report", "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ], "origin": { @@ -3354,6 +3454,7 @@ } ], "headings": [ + "Docling Technical Report", "ABSTRACT" ], "origin": { @@ -3395,6 +3496,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -3436,6 +3538,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -3477,6 +3580,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -3518,6 +3622,7 @@ } ], "headings": [ + "Docling Technical Report", "CCS CONCEPTS" ], "origin": { @@ -3559,6 +3664,7 @@ } ], "headings": [ + "Docling Technical Report", "KEYWORDS" ], "origin": { @@ -3600,6 +3706,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3641,6 +3748,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3682,6 +3790,7 @@ } ], "headings": [ + "Docling Technical Report", "KEYWORDS" ], "origin": { @@ -3723,6 +3832,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3764,6 +3874,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3805,6 +3916,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3846,6 +3958,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3887,6 +4000,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3928,6 +4042,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -3969,6 +4084,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -4010,6 +4126,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "captions": [ @@ -4054,6 +4171,7 @@ } ], "headings": [ + "Docling Technical Report", "ACM Reference Format:" ], "origin": { @@ -4095,6 +4213,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4136,6 +4255,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4177,6 +4297,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4218,6 +4339,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4259,6 +4381,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4300,6 +4423,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4341,6 +4465,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4382,6 +4507,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4423,6 +4549,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4464,6 +4591,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4505,6 +4633,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4546,6 +4675,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4587,6 +4717,7 @@ } ], "headings": [ + "Docling Technical Report", "5 EXPERIMENTS" ], "origin": { @@ -4628,6 +4759,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4669,6 +4801,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4710,6 +4843,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4751,6 +4885,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4792,6 +4927,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "captions": [ @@ -4836,6 +4972,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4877,6 +5014,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4918,6 +5056,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -4959,6 +5098,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -5000,6 +5140,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": { @@ -5041,6 +5182,7 @@ } ], "headings": [ + "Docling Technical Report", "Baselines for Object Detection" ], "origin": {