diff --git a/docling_core/experimental/idoctags.py b/docling_core/experimental/idoctags.py
index 9f497f14..7990f4cd 100644
--- a/docling_core/experimental/idoctags.py
+++ b/docling_core/experimental/idoctags.py
@@ -1,7 +1,7 @@
"""Define classes for DocTags serialization."""
from enum import Enum
-from typing import Any, Final, Optional
+from typing import Any, Final, Optional, Tuple
from xml.dom.minidom import parseString
from pydantic import BaseModel
@@ -9,6 +9,7 @@
from docling_core.transforms.serializer.base import (
BaseDocSerializer,
+ BaseListSerializer,
BaseMetaSerializer,
BasePictureSerializer,
BaseTableSerializer,
@@ -28,6 +29,8 @@
DescriptionMetaField,
DocItem,
DoclingDocument,
+ ListGroup,
+ ListItem,
MetaFieldName,
MoleculeMetaField,
NodeItem,
@@ -38,7 +41,10 @@
TabularChartMetaField,
)
from docling_core.types.doc.labels import DocItemLabel
-from docling_core.types.doc.tokens import DocumentToken
+from docling_core.types.doc.tokens import (
+ _CodeLanguageToken,
+ _PictureClassificationToken,
+)
DOCTAGS_VERSION: Final = "1.0.0"
@@ -61,6 +67,127 @@ class IDocTagsTableToken(str, Enum):
OTSL_RHED = "" # - row header cell,
OTSL_SROW = "" # - section row cell
+ @classmethod
+ def get_special_tokens(
+ cls,
+ ):
+ """Return all table-related special tokens.
+
+ Includes the opening/closing OTSL tags and each enum token value.
+ """
+ special_tokens: list[str] = ["", ""]
+ for token in cls:
+ special_tokens.append(f"{token.value}")
+
+ return special_tokens
+
+
+class IDocTagsToken(str, Enum):
+ """IDocTagsToken."""
+
+ _LOC_PREFIX = "loc_"
+ _SECTION_HEADER_PREFIX = "section_header_level_"
+
+ DOCUMENT = "doctag"
+ VERSION = "version"
+
+ OTSL = "otsl"
+ ORDERED_LIST = "ordered_list"
+ UNORDERED_LIST = "unordered_list"
+
+ PAGE_BREAK = "page_break"
+
+ CAPTION = "caption"
+ FOOTNOTE = "footnote"
+ FORMULA = "formula"
+ LIST_ITEM = "list_item"
+ PAGE_FOOTER = "page_footer"
+ PAGE_HEADER = "page_header"
+ PICTURE = "picture"
+ SECTION_HEADER = "section_header"
+ TABLE = "table"
+ TEXT = "text"
+ TITLE = "title"
+ DOCUMENT_INDEX = "document_index"
+ CODE = "code"
+ CHECKBOX_SELECTED = "checkbox_selected"
+ CHECKBOX_UNSELECTED = "checkbox_unselected"
+ FORM = "form"
+ EMPTY_VALUE = "empty_value" # used for empty value fields in fillable forms
+
+ @classmethod
+ def get_special_tokens(
+ cls,
+ *,
+ page_dimension: Tuple[int, int] = (500, 500),
+ include_location_tokens: bool = True,
+ include_code_class: bool = False,
+ include_picture_class: bool = False,
+ ):
+ """Function to get all special document tokens."""
+ special_tokens: list[str] = []
+ for token in cls:
+ if not token.value.endswith("_"):
+ special_tokens.append(f"<{token.value}>")
+ special_tokens.append(f"{token.value}>")
+
+ for i in range(6):
+ special_tokens += [
+ f"<{IDocTagsToken._SECTION_HEADER_PREFIX.value}{i}>",
+ f"{IDocTagsToken._SECTION_HEADER_PREFIX.value}{i}>",
+ ]
+
+ special_tokens.extend(IDocTagsTableToken.get_special_tokens())
+
+ if include_picture_class:
+ special_tokens.extend([t.value for t in _PictureClassificationToken])
+
+ if include_code_class:
+ special_tokens.extend([t.value for t in _CodeLanguageToken])
+
+ if include_location_tokens:
+ # Adding dynamically generated location-tokens
+ for i in range(0, max(page_dimension[0], page_dimension[1])):
+ special_tokens.append(f"<{IDocTagsToken._LOC_PREFIX.value}{i}/>")
+
+ return special_tokens
+
+ @classmethod
+ def create_token_name_from_doc_item_label(cls, label: str, level: int = 1) -> str:
+ """Get token corresponding to passed doc item label."""
+ doc_token_by_item_label = {
+ DocItemLabel.CAPTION: IDocTagsToken.CAPTION,
+ DocItemLabel.FOOTNOTE: IDocTagsToken.FOOTNOTE,
+ DocItemLabel.FORMULA: IDocTagsToken.FORMULA,
+ DocItemLabel.LIST_ITEM: IDocTagsToken.LIST_ITEM,
+ DocItemLabel.PAGE_FOOTER: IDocTagsToken.PAGE_FOOTER,
+ DocItemLabel.PAGE_HEADER: IDocTagsToken.PAGE_HEADER,
+ DocItemLabel.PICTURE: IDocTagsToken.PICTURE,
+ DocItemLabel.TABLE: IDocTagsToken.TABLE,
+ DocItemLabel.TEXT: IDocTagsToken.TEXT,
+ DocItemLabel.TITLE: IDocTagsToken.TITLE,
+ DocItemLabel.DOCUMENT_INDEX: IDocTagsToken.DOCUMENT_INDEX,
+ DocItemLabel.CODE: IDocTagsToken.CODE,
+ DocItemLabel.CHECKBOX_SELECTED: IDocTagsToken.CHECKBOX_SELECTED,
+ DocItemLabel.CHECKBOX_UNSELECTED: IDocTagsToken.CHECKBOX_UNSELECTED,
+ DocItemLabel.FORM: IDocTagsToken.FORM,
+ # Fallback mappings for labels without dedicated tokens in IDocTagsToken
+ DocItemLabel.KEY_VALUE_REGION: IDocTagsToken.TEXT,
+ DocItemLabel.PARAGRAPH: IDocTagsToken.TEXT,
+ DocItemLabel.REFERENCE: IDocTagsToken.TEXT,
+ DocItemLabel.CHART: IDocTagsToken.PICTURE,
+ }
+
+ res: str
+ if label == DocItemLabel.SECTION_HEADER:
+ res = f"{IDocTagsToken._SECTION_HEADER_PREFIX}{level}"
+ else:
+ try:
+ res = doc_token_by_item_label[DocItemLabel(label)].value
+ except KeyError as e:
+ raise RuntimeError(f"Unexpected DocItemLabel: {label}") from e
+ return res
+
class IDocTagsParams(DocTagsParams):
"""DocTags-specific serialization parameters."""
@@ -69,6 +196,136 @@ class IDocTagsParams(DocTagsParams):
pretty_indentation: Optional[str] = 2 * " "
+class IDocTagsListSerializer(BaseModel, BaseListSerializer):
+ """DocTags-specific list serializer."""
+
+ indent: int = 4
+
+ @override
+ def serialize(
+ self,
+ *,
+ item: ListGroup,
+ doc_serializer: "BaseDocSerializer",
+ doc: DoclingDocument,
+ list_level: int = 0,
+ is_inline_scope: bool = False,
+ visited: Optional[set[str]] = None, # refs of visited items
+ **kwargs: Any,
+ ) -> SerializationResult:
+ """Serialize a ``ListGroup`` into IDocTags markup.
+
+ This emits list containers (````/````) and
+ serializes children explicitly. Nested ``ListGroup`` items are emitted as
+ siblings without an enclosing ```` wrapper, while structural
+ wrappers are still preserved even when content is suppressed.
+
+ Args:
+ item: The list group to serialize.
+ doc_serializer: The document-level serializer to delegate nested items.
+ doc: The document that provides item resolution.
+ list_level: Current nesting depth (0-based).
+ is_inline_scope: Whether serialization happens in an inline context.
+ visited: Set of already visited item refs to avoid cycles.
+ **kwargs: Additional serializer parameters forwarded to ``IDocTagsParams``.
+
+ Returns:
+ A ``SerializationResult`` containing serialized text and metadata.
+ """
+ my_visited = visited if visited is not None else set()
+ params = IDocTagsParams(**kwargs)
+
+ # Build list children explicitly. Requirements:
+ # 1) / can be children of lists.
+ # 2) Do NOT wrap nested lists into , even if they are
+ # children of a ListItem in the logical structure.
+ # 3) Still ensure structural wrappers are preserved even when
+ # content is suppressed (e.g., add_content=False).
+ item_results: list[SerializationResult] = []
+ child_results_wrapped: list[str] = []
+
+ excluded = doc_serializer.get_excluded_refs(**kwargs)
+ for child_ref in item.children:
+ child = child_ref.resolve(doc)
+
+ # If a nested list group is present directly under this list group,
+ # emit it as a sibling (no wrapper).
+ if isinstance(child, ListGroup):
+ if child.self_ref in my_visited or child.self_ref in excluded:
+ continue
+ my_visited.add(child.self_ref)
+ sub_res = doc_serializer.serialize(
+ item=child,
+ list_level=list_level + 1,
+ is_inline_scope=is_inline_scope,
+ visited=my_visited,
+ **kwargs,
+ )
+ if sub_res.text:
+ child_results_wrapped.append(sub_res.text)
+ item_results.append(sub_res)
+ continue
+
+ # Normal case: ListItem under ListGroup
+ if not isinstance(child, ListItem):
+ continue
+ if child.self_ref in my_visited or child.self_ref in excluded:
+ continue
+
+ my_visited.add(child.self_ref)
+
+ # Serialize the list item content (DocTagsTextSerializer will not wrap it)
+ child_res = doc_serializer.serialize(
+ item=child,
+ list_level=list_level + 1,
+ is_inline_scope=is_inline_scope,
+ visited=my_visited,
+ **kwargs,
+ )
+ item_results.append(child_res)
+ # Wrap the content into , without any nested list content.
+ child_text_wrapped = _wrap(
+ text=f"{child_res.text}",
+ wrap_tag=IDocTagsToken.LIST_ITEM.value,
+ )
+ child_results_wrapped.append(child_text_wrapped)
+
+ # After the , append any nested lists (children of this ListItem)
+ # as siblings at the same level (not wrapped in ).
+ for subref in child.children:
+ sub = subref.resolve(doc)
+ if (
+ isinstance(sub, ListGroup)
+ and sub.self_ref not in my_visited
+ and sub.self_ref not in excluded
+ ):
+ my_visited.add(sub.self_ref)
+ sub_res = doc_serializer.serialize(
+ item=sub,
+ list_level=list_level + 1,
+ is_inline_scope=is_inline_scope,
+ visited=my_visited,
+ **kwargs,
+ )
+ if sub_res.text:
+ child_results_wrapped.append(sub_res.text)
+ item_results.append(sub_res)
+
+ delim = _get_delim(params=params)
+ if child_results_wrapped:
+ text_res = delim.join(child_results_wrapped)
+ text_res = f"{text_res}{delim}"
+ wrap_tag = (
+ IDocTagsToken.ORDERED_LIST.value
+ if item.first_item_is_enumerated(doc)
+ else IDocTagsToken.UNORDERED_LIST.value
+ )
+ text_res = _wrap(text=text_res, wrap_tag=wrap_tag)
+ else:
+ text_res = ""
+ return create_ser_result(text=text_res, span_source=item_results)
+
+
class IDocTagsMetaSerializer(BaseModel, BaseMetaSerializer):
"""DocTags-specific meta serializer."""
@@ -187,6 +444,8 @@ def serialize(
otsl_content = temp_table.export_to_otsl(
temp_doc,
add_cell_location=False,
+ # Suppress chart cell text if global content is off
+ add_cell_text=params.add_content,
self_closing=params.do_self_closing,
table_token=IDocTagsTableToken,
)
@@ -200,7 +459,7 @@ def serialize(
text_res = "".join([r.text for r in res_parts])
if text_res:
- token = DocumentToken.create_token_name_from_doc_item_label(
+ token = IDocTagsToken.create_token_name_from_doc_item_label(
label=DocItemLabel.CHART if is_chart else DocItemLabel.PICTURE,
)
text_res = _wrap(text=text_res, wrap_tag=token)
@@ -238,12 +497,16 @@ def serialize_doc(
text_res = delim.join([p.text for p in parts if p.text])
if self.params.add_page_break:
- page_sep = f"<{DocumentToken.PAGE_BREAK.value}{'/' if self.params.do_self_closing else ''}>"
+ page_sep = f"<{IDocTagsToken.PAGE_BREAK.value}{'/' if self.params.do_self_closing else ''}>"
for full_match, _, _ in self._get_page_breaks(text=text_res):
text_res = text_res.replace(full_match, page_sep)
- wrap_tag = DocumentToken.DOCUMENT.value
- text_res = f"<{wrap_tag}>{DOCTAGS_VERSION}{text_res}{delim}{wrap_tag}>"
+ tmp = f"<{IDocTagsToken.DOCUMENT.value}>"
+ tmp += f"<{IDocTagsToken.VERSION.value}>{DOCTAGS_VERSION}{IDocTagsToken.VERSION.value}>"
+ tmp += f"{text_res}"
+ tmp += f"{IDocTagsToken.DOCUMENT.value}>"
+
+ text_res = tmp
if self.params.pretty_indentation and (
my_root := parseString(text_res).documentElement
@@ -252,4 +515,5 @@ def serialize_doc(
text_res = "\n".join(
[line for line in text_res.split("\n") if line.strip()]
)
+
return create_ser_result(text=text_res, span_source=parts)
diff --git a/docling_core/transforms/serializer/common.py b/docling_core/transforms/serializer/common.py
index 4930e839..b494eb0e 100644
--- a/docling_core/transforms/serializer/common.py
+++ b/docling_core/transforms/serializer/common.py
@@ -470,6 +470,10 @@ def get_parts(
parts: list[SerializationResult] = []
my_visited: set[str] = visited if visited is not None else set()
params = self.params.merge_with_patch(patch=kwargs)
+ add_content = True
+
+ if hasattr(params, "add_content"):
+ add_content = getattr(params, "add_content")
for node, lvl in _iterate_items(
node=item,
@@ -489,7 +493,7 @@ def get_parts(
visited=my_visited,
**(dict(level=lvl) | kwargs),
)
- if part.text:
+ if len(part.text.strip()) > 0 or (not add_content):
parts.append(part)
return parts
diff --git a/docling_core/transforms/serializer/doctags.py b/docling_core/transforms/serializer/doctags.py
index 807b7750..beff6168 100644
--- a/docling_core/transforms/serializer/doctags.py
+++ b/docling_core/transforms/serializer/doctags.py
@@ -106,10 +106,16 @@ def serialize(
"""Serializes the passed item."""
my_visited = visited if visited is not None else set()
params = DocTagsParams(**kwargs)
- wrap_tag: Optional[str] = DocumentToken.create_token_name_from_doc_item_label(
- label=item.label,
- **({"level": item.level} if isinstance(item, SectionHeaderItem) else {}),
+ # Decide wrapping up-front so ListItem never gets wrapped here
+ wrap_tag_token: Optional[str] = (
+ DocumentToken.create_token_name_from_doc_item_label(
+ label=item.label,
+ **(
+ {"level": item.level} if isinstance(item, SectionHeaderItem) else {}
+ ),
+ )
)
+ wrap_tag: Optional[str] = None if isinstance(item, ListItem) else wrap_tag_token
parts: list[str] = []
if item.meta:
@@ -152,8 +158,6 @@ def serialize(
text_part = f"{language_token}{text_part}"
else:
text_part = text_part.strip()
- if isinstance(item, ListItem):
- wrap_tag = None # deferring list item tags to list handling
if text_part:
parts.append(text_part)
@@ -203,7 +207,8 @@ def serialize(
otsl_text = item.export_to_otsl(
doc=doc,
add_cell_location=params.add_table_cell_location,
- add_cell_text=params.add_table_cell_text,
+ # Suppress cell text when global content is disabled
+ add_cell_text=(params.add_table_cell_text and params.add_content),
xsize=params.xsize,
ysize=params.ysize,
visited=visited,
@@ -460,6 +465,7 @@ def serialize(
**kwargs,
)
delim = _get_delim(params=params)
+
if parts:
text_res = delim.join(
[
@@ -636,18 +642,19 @@ def serialize_captions(
results: list[SerializationResult] = []
if item.captions:
cap_res = super().serialize_captions(item, **kwargs)
- if cap_res.text:
- if params.add_location:
- for caption in item.captions:
- if caption.cref not in self.get_excluded_refs(**kwargs):
- if isinstance(cap := caption.resolve(self.doc), DocItem):
- loc_txt = cap.get_location_tokens(
- doc=self.doc,
- xsize=params.xsize,
- ysize=params.ysize,
- self_closing=params.do_self_closing,
- )
- results.append(create_ser_result(text=loc_txt))
+ if cap_res.text and params.add_location:
+ for caption in item.captions:
+ if caption.cref not in self.get_excluded_refs(**kwargs):
+ if isinstance(cap := caption.resolve(self.doc), DocItem):
+ loc_txt = cap.get_location_tokens(
+ doc=self.doc,
+ xsize=params.xsize,
+ ysize=params.ysize,
+ self_closing=params.do_self_closing,
+ )
+ results.append(create_ser_result(text=loc_txt))
+ # Only include caption textual content when add_content is True
+ if cap_res.text and params.add_content:
results.append(cap_res)
text_res = "".join([r.text for r in results])
if text_res:
diff --git a/test/data/doc/ddoc_0.json b/test/data/doc/ddoc_0.json
new file mode 100644
index 00000000..7894bed1
--- /dev/null
+++ b/test/data/doc/ddoc_0.json
@@ -0,0 +1,2103 @@
+{
+ "schema_name": "DoclingDocument",
+ "version": "1.8.0",
+ "name": "00073b00f3fbd33ef92f0c4902c5c7397c89f07f6a5528c5c97af53c67c4dcc7",
+ "furniture": {
+ "self_ref": "#/furniture",
+ "children": [],
+ "content_layer": "furniture",
+ "name": "_root_",
+ "label": "unspecified"
+ },
+ "body": {
+ "self_ref": "#/body",
+ "children": [
+ {
+ "$ref": "#/texts/0"
+ },
+ {
+ "$ref": "#/tables/0"
+ },
+ {
+ "$ref": "#/texts/1"
+ },
+ {
+ "$ref": "#/groups/0"
+ },
+ {
+ "$ref": "#/tables/1"
+ },
+ {
+ "$ref": "#/texts/3"
+ },
+ {
+ "$ref": "#/groups/1"
+ },
+ {
+ "$ref": "#/tables/2"
+ },
+ {
+ "$ref": "#/texts/5"
+ },
+ {
+ "$ref": "#/groups/2"
+ },
+ {
+ "$ref": "#/tables/3"
+ },
+ {
+ "$ref": "#/texts/7"
+ },
+ {
+ "$ref": "#/groups/3"
+ },
+ {
+ "$ref": "#/tables/4"
+ },
+ {
+ "$ref": "#/texts/9"
+ },
+ {
+ "$ref": "#/groups/4"
+ },
+ {
+ "$ref": "#/tables/5"
+ },
+ {
+ "$ref": "#/texts/11"
+ },
+ {
+ "$ref": "#/groups/5"
+ },
+ {
+ "$ref": "#/tables/6"
+ },
+ {
+ "$ref": "#/texts/13"
+ },
+ {
+ "$ref": "#/texts/14"
+ }
+ ],
+ "content_layer": "body",
+ "name": "_root_",
+ "label": "unspecified"
+ },
+ "groups": [
+ {
+ "self_ref": "#/groups/0",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [
+ {
+ "$ref": "#/texts/2"
+ }
+ ],
+ "content_layer": "body",
+ "name": "list_standalone_10",
+ "label": "list"
+ },
+ {
+ "self_ref": "#/groups/1",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [
+ {
+ "$ref": "#/texts/4"
+ }
+ ],
+ "content_layer": "body",
+ "name": "list_standalone_11",
+ "label": "list"
+ },
+ {
+ "self_ref": "#/groups/2",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [
+ {
+ "$ref": "#/texts/6"
+ }
+ ],
+ "content_layer": "body",
+ "name": "list_standalone_12",
+ "label": "list"
+ },
+ {
+ "self_ref": "#/groups/3",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [
+ {
+ "$ref": "#/texts/8"
+ }
+ ],
+ "content_layer": "body",
+ "name": "list_standalone_13",
+ "label": "list"
+ },
+ {
+ "self_ref": "#/groups/4",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [
+ {
+ "$ref": "#/texts/10"
+ }
+ ],
+ "content_layer": "body",
+ "name": "list_standalone_14",
+ "label": "list"
+ },
+ {
+ "self_ref": "#/groups/5",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [
+ {
+ "$ref": "#/texts/12"
+ }
+ ],
+ "content_layer": "body",
+ "name": "list_standalone_15",
+ "label": "list"
+ }
+ ],
+ "texts": [
+ {
+ "self_ref": "#/texts/0",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "furniture",
+ "label": "text",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 412.099992,
+ "t": 1510.53408,
+ "r": 847.769328,
+ "b": 1489.28472,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 47
+ ]
+ }
+ ],
+ "orig": "ndbinfo_select_all - Select From ndbinfo Tables",
+ "text": "ndbinfo_select_all - Select From ndbinfo Tables"
+ },
+ {
+ "self_ref": "#/texts/1",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "text",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 223.603992,
+ "t": 1315.97136,
+ "r": 1144.0813679999999,
+ "b": 1294.722,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 104
+ ]
+ }
+ ],
+ "orig": "This option sets the number of times to execute the select. Use --delay to set the time between loops.",
+ "text": "This option sets the number of times to execute the select. Use --delay to set the time between loops."
+ },
+ {
+ "self_ref": "#/texts/2",
+ "parent": {
+ "$ref": "#/groups/0"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "list_item",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 204.531624,
+ "t": 1271.087136,
+ "r": 451.80899999999997,
+ "b": 1249.8377759999998,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 21
+ ]
+ }
+ ],
+ "orig": "\u2022 --ndb-connectstring",
+ "text": "\u2022 --ndb-connectstring",
+ "enumerated": false,
+ "marker": ""
+ },
+ {
+ "self_ref": "#/texts/3",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "text",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 224.271072,
+ "t": 1084.613904,
+ "r": 1142.0813520000002,
+ "b": 1039.4017920000001,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 143
+ ]
+ }
+ ],
+ "orig": "Set connect string for connecting to ndb_mgmd. Syntax: \"[nodeid=id;][host=]hostname[:port]\". Overrides entries in NDB_CONNECTSTRING and my.cnf.",
+ "text": "Set connect string for connecting to ndb_mgmd. Syntax: \"[nodeid=id;][host=]hostname[:port]\". Overrides entries in NDB_CONNECTSTRING and my.cnf."
+ },
+ {
+ "self_ref": "#/texts/4",
+ "parent": {
+ "$ref": "#/groups/1"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "list_item",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 204.740928,
+ "t": 1014.705648,
+ "r": 403.115832,
+ "b": 996.925248,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 17
+ ]
+ }
+ ],
+ "orig": "\u2022 --ndb-mgmd-host",
+ "text": "\u2022 --ndb-mgmd-host",
+ "enumerated": false,
+ "marker": ""
+ },
+ {
+ "self_ref": "#/texts/5",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "text",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 223.91855999999999,
+ "t": 829.993824,
+ "r": 541.732608,
+ "b": 808.744464,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 30
+ ]
+ }
+ ],
+ "orig": "Same as --ndb-connectstring .",
+ "text": "Same as --ndb-connectstring ."
+ },
+ {
+ "self_ref": "#/texts/6",
+ "parent": {
+ "$ref": "#/groups/2"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "list_item",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 203.139936,
+ "t": 783.451152,
+ "r": 367.819344,
+ "b": 769.089024,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 14
+ ]
+ }
+ ],
+ "orig": "\u2022 --ndb-nodeid",
+ "text": "\u2022 --ndb-nodeid",
+ "enumerated": false,
+ "marker": ""
+ },
+ {
+ "self_ref": "#/texts/7",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "text",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 224.521992,
+ "t": 598.5096480000001,
+ "r": 849.322584,
+ "b": 577.260288,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 72
+ ]
+ }
+ ],
+ "orig": "Set node ID for this node, overriding any ID set by --ndb-connectstring.",
+ "text": "Set node ID for this node, overriding any ID set by --ndb-connectstring."
+ },
+ {
+ "self_ref": "#/texts/8",
+ "parent": {
+ "$ref": "#/groups/3"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "list_item",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 204.8058,
+ "t": 552.36456,
+ "r": 584.730504,
+ "b": 536.87304,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 32
+ ]
+ }
+ ],
+ "orig": "\u2022 --ndb-optimized-node-selection",
+ "text": "\u2022 --ndb-optimized-node-selection",
+ "enumerated": false,
+ "marker": ""
+ },
+ {
+ "self_ref": "#/texts/9",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "text",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 223.919784,
+ "t": 434.44684800000005,
+ "r": 1102.77504,
+ "b": 388.3603679999999,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 136
+ ]
+ }
+ ],
+ "orig": "Enable optimizations for selection of nodes for transactions. Enabled by default; use --skip-ndb- optimized-node-selection to disable.",
+ "text": "Enable optimizations for selection of nodes for transactions. Enabled by default; use --skip-ndb- optimized-node-selection to disable."
+ },
+ {
+ "self_ref": "#/texts/10",
+ "parent": {
+ "$ref": "#/groups/4"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "list_item",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 204.31619999999998,
+ "t": 363.80044799999996,
+ "r": 377.693352,
+ "b": 349.65532800000005,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 15
+ ]
+ }
+ ],
+ "orig": "\u2022 --no-defaults",
+ "text": "\u2022 --no-defaults",
+ "enumerated": false,
+ "marker": ""
+ },
+ {
+ "self_ref": "#/texts/11",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "text",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 225.182952,
+ "t": 245.847888,
+ "r": 818.1216,
+ "b": 224.598528,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 71
+ ]
+ }
+ ],
+ "orig": "Do not read default options from any option file other than login file.",
+ "text": "Do not read default options from any option file other than login file."
+ },
+ {
+ "self_ref": "#/texts/12",
+ "parent": {
+ "$ref": "#/groups/5"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "list_item",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 204.862104,
+ "t": 199.52063999999996,
+ "r": 414.566352,
+ "b": 180.91339199999993,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 18
+ ]
+ }
+ ],
+ "orig": "\u2022 --print-defaults",
+ "text": "\u2022 --print-defaults",
+ "enumerated": false,
+ "marker": ""
+ },
+ {
+ "self_ref": "#/texts/13",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "furniture",
+ "label": "text",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 224.58808800000003,
+ "t": 80.88537599999995,
+ "r": 545.5931039999999,
+ "b": 59.61700799999994,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 37
+ ]
+ }
+ ],
+ "orig": "Print program argument list and exit.",
+ "text": "Print program argument list and exit."
+ },
+ {
+ "self_ref": "#/texts/14",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "furniture",
+ "label": "text",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 1105.84116,
+ "t": 93.37996799999996,
+ "r": 1151.544096,
+ "b": 77.81716800000004,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 4
+ ]
+ }
+ ],
+ "orig": "4253",
+ "text": "4253"
+ }
+ ],
+ "pictures": [],
+ "tables": [
+ {
+ "self_ref": "#/tables/0",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "table",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 222.250248,
+ "t": 1438.47,
+ "r": 1151.596728,
+ "b": 1340.410896,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 53
+ ]
+ }
+ ],
+ "captions": [],
+ "references": [],
+ "footnotes": [],
+ "data": {
+ "table_cells": [
+ {
+ "bbox": {
+ "l": 224.533008,
+ "t": 146.833632,
+ "r": 688.3127280000001,
+ "b": 176.839344,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Default Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.2906959999999,
+ "t": 146.833632,
+ "r": 1149.957792,
+ "b": 176.839344,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "1",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 224.51464800000002,
+ "t": 176.642928,
+ "r": 688.3127280000001,
+ "b": 209.54419199999998,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Minimum Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.2906959999999,
+ "t": 176.642928,
+ "r": 1149.957792,
+ "b": 209.54419199999998,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "0",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 224.553816,
+ "t": 209.526768,
+ "r": 688.3127280000001,
+ "b": 241.270128,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Maximum Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.2906959999999,
+ "t": 209.526768,
+ "r": 1149.957792,
+ "b": 242.42803199999997,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "MAX_INT",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ "num_rows": 3,
+ "num_cols": 2,
+ "grid": [
+ [
+ {
+ "bbox": {
+ "l": 224.533008,
+ "t": 146.833632,
+ "r": 688.3127280000001,
+ "b": 176.839344,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Default Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.2906959999999,
+ "t": 146.833632,
+ "r": 1149.957792,
+ "b": 176.839344,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "1",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ [
+ {
+ "bbox": {
+ "l": 224.51464800000002,
+ "t": 176.642928,
+ "r": 688.3127280000001,
+ "b": 209.54419199999998,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Minimum Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.2906959999999,
+ "t": 176.642928,
+ "r": 1149.957792,
+ "b": 209.54419199999998,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "0",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ [
+ {
+ "bbox": {
+ "l": 224.553816,
+ "t": 209.526768,
+ "r": 688.3127280000001,
+ "b": 241.270128,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Maximum Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.2906959999999,
+ "t": 209.526768,
+ "r": 1149.957792,
+ "b": 242.42803199999997,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "MAX_INT",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ]
+ ]
+ },
+ "annotations": []
+ },
+ {
+ "self_ref": "#/tables/1",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "table",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 222.977304,
+ "t": 1210.340736,
+ "r": 1153.22832,
+ "b": 1109.547648,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 90
+ ]
+ }
+ ],
+ "captions": [],
+ "references": [],
+ "footnotes": [],
+ "data": {
+ "table_cells": [
+ {
+ "bbox": {
+ "l": 224.89898399999998,
+ "t": 375.309792,
+ "r": 686.81088,
+ "b": 408.21105600000004,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8488239999999,
+ "t": 375.309792,
+ "r": 1150.3458,
+ "b": 408.21105600000004,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--ndb-connectstring=connection-string",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 224.672544,
+ "t": 407.9196,
+ "r": 686.81088,
+ "b": 440.820864,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Type",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8488239999999,
+ "t": 407.9196,
+ "r": 1150.1193600000001,
+ "b": 440.820864,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "String",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 224.63092799999998,
+ "t": 440.996688,
+ "r": 686.81088,
+ "b": 472.518288,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Default Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8488239999999,
+ "t": 440.996688,
+ "r": 1150.0777440000002,
+ "b": 472.518288,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "[none]",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ "num_rows": 3,
+ "num_cols": 2,
+ "grid": [
+ [
+ {
+ "bbox": {
+ "l": 224.89898399999998,
+ "t": 375.309792,
+ "r": 686.81088,
+ "b": 408.21105600000004,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8488239999999,
+ "t": 375.309792,
+ "r": 1150.3458,
+ "b": 408.21105600000004,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--ndb-connectstring=connection-string",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ [
+ {
+ "bbox": {
+ "l": 224.672544,
+ "t": 407.9196,
+ "r": 686.81088,
+ "b": 440.820864,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Type",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8488239999999,
+ "t": 407.9196,
+ "r": 1150.1193600000001,
+ "b": 440.820864,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "String",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ [
+ {
+ "bbox": {
+ "l": 224.63092799999998,
+ "t": 440.996688,
+ "r": 686.81088,
+ "b": 472.518288,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Default Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8488239999999,
+ "t": 440.996688,
+ "r": 1150.0777440000002,
+ "b": 472.518288,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "[none]",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ]
+ ]
+ },
+ "annotations": []
+ },
+ {
+ "self_ref": "#/tables/2",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "table",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 223.20129599999999,
+ "t": 955.31832,
+ "r": 1153.452312,
+ "b": 854.525232,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 86
+ ]
+ }
+ ],
+ "captions": [],
+ "references": [],
+ "footnotes": [],
+ "data": {
+ "table_cells": [
+ {
+ "bbox": {
+ "l": 225.70927200000003,
+ "t": 630.3464640000001,
+ "r": 688.6591199999999,
+ "b": 661.868064,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.993272,
+ "t": 630.3464640000001,
+ "r": 1151.156088,
+ "b": 661.868064,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--ndb-mgmd-host=connection-string",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 224.87083199999998,
+ "t": 661.646304,
+ "r": 688.6591199999999,
+ "b": 694.872288,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Type",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.993272,
+ "t": 661.646304,
+ "r": 1150.771752,
+ "b": 694.872288,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "String",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 225.110736,
+ "t": 695.24928,
+ "r": 688.6591199999999,
+ "b": 726.77088,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Default Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.993272,
+ "t": 695.24928,
+ "r": 1150.557552,
+ "b": 726.77088,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "[none]",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ "num_rows": 3,
+ "num_cols": 2,
+ "grid": [
+ [
+ {
+ "bbox": {
+ "l": 225.70927200000003,
+ "t": 630.3464640000001,
+ "r": 688.6591199999999,
+ "b": 661.868064,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.993272,
+ "t": 630.3464640000001,
+ "r": 1151.156088,
+ "b": 661.868064,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--ndb-mgmd-host=connection-string",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ [
+ {
+ "bbox": {
+ "l": 224.87083199999998,
+ "t": 661.646304,
+ "r": 688.6591199999999,
+ "b": 694.872288,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Type",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.993272,
+ "t": 661.646304,
+ "r": 1150.771752,
+ "b": 694.872288,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "String",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ [
+ {
+ "bbox": {
+ "l": 225.110736,
+ "t": 695.24928,
+ "r": 688.6591199999999,
+ "b": 726.77088,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Default Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 688.993272,
+ "t": 695.24928,
+ "r": 1150.557552,
+ "b": 726.77088,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "[none]",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ]
+ ]
+ },
+ "annotations": []
+ },
+ {
+ "self_ref": "#/tables/3",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "table",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 222.433848,
+ "t": 724.960368,
+ "r": 1152.684864,
+ "b": 624.16728,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 68
+ ]
+ }
+ ],
+ "captions": [],
+ "references": [],
+ "footnotes": [],
+ "data": {
+ "table_cells": [
+ {
+ "bbox": {
+ "l": 224.98344,
+ "t": 860.6774879999999,
+ "r": 686.840256,
+ "b": 893.903472,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8684079999999,
+ "t": 860.6774879999999,
+ "r": 1150.917408,
+ "b": 893.903472,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--ndb-nodeid=#",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 224.908776,
+ "t": 893.849616,
+ "r": 686.840256,
+ "b": 927.0756,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Type",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8684079999999,
+ "t": 893.849616,
+ "r": 1150.842744,
+ "b": 927.0756,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "Integer",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 225.055656,
+ "t": 927.378144,
+ "r": 686.840256,
+ "b": 957.981024,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Default Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8684079999999,
+ "t": 927.378144,
+ "r": 1150.960248,
+ "b": 958.152096,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "[none]",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ "num_rows": 3,
+ "num_cols": 2,
+ "grid": [
+ [
+ {
+ "bbox": {
+ "l": 224.98344,
+ "t": 860.6774879999999,
+ "r": 686.840256,
+ "b": 893.903472,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8684079999999,
+ "t": 860.6774879999999,
+ "r": 1150.917408,
+ "b": 893.903472,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--ndb-nodeid=#",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ [
+ {
+ "bbox": {
+ "l": 224.908776,
+ "t": 893.849616,
+ "r": 686.840256,
+ "b": 927.0756,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Type",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8684079999999,
+ "t": 893.849616,
+ "r": 1150.842744,
+ "b": 927.0756,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 1,
+ "end_row_offset_idx": 2,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "Integer",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ [
+ {
+ "bbox": {
+ "l": 225.055656,
+ "t": 927.378144,
+ "r": 686.840256,
+ "b": 957.981024,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Default Value",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.8684079999999,
+ "t": 927.378144,
+ "r": 1150.960248,
+ "b": 958.152096,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 2,
+ "end_row_offset_idx": 3,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "[none]",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ]
+ ]
+ },
+ "annotations": []
+ },
+ {
+ "self_ref": "#/tables/4",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "table",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 223.6248,
+ "t": 492.69369600000005,
+ "r": 1154.5906320000001,
+ "b": 459.70056,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 50
+ ]
+ }
+ ],
+ "captions": [],
+ "references": [],
+ "footnotes": [],
+ "data": {
+ "table_cells": [
+ {
+ "bbox": {
+ "l": 225.216,
+ "t": 1092.2915520000001,
+ "r": 686.2784399999999,
+ "b": 1123.383888,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.6272799999999,
+ "t": 1093.1120640000001,
+ "r": 1150.917408,
+ "b": 1122.805728,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--ndb-optimized-node-selection",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ "num_rows": 1,
+ "num_cols": 2,
+ "grid": [
+ [
+ {
+ "bbox": {
+ "l": 225.216,
+ "t": 1092.2915520000001,
+ "r": 686.2784399999999,
+ "b": 1123.383888,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 686.6272799999999,
+ "t": 1093.1120640000001,
+ "r": 1150.917408,
+ "b": 1122.805728,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--ndb-optimized-node-selection",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ]
+ ]
+ },
+ "annotations": []
+ },
+ {
+ "self_ref": "#/tables/5",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "table",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 222.891624,
+ "t": 304.4162879999999,
+ "r": 1153.14264,
+ "b": 269.734608,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 33
+ ]
+ }
+ ],
+ "captions": [],
+ "references": [],
+ "footnotes": [],
+ "data": {
+ "table_cells": [
+ {
+ "bbox": {
+ "l": 224.082576,
+ "t": 1281.102768,
+ "r": 687.7031760000001,
+ "b": 1314.122832,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 687.775392,
+ "t": 1280.29968,
+ "r": 1150.0165439999998,
+ "b": 1314.122832,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--no-defaults",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ "num_rows": 1,
+ "num_cols": 2,
+ "grid": [
+ [
+ {
+ "bbox": {
+ "l": 224.082576,
+ "t": 1281.102768,
+ "r": 687.7031760000001,
+ "b": 1314.122832,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 687.775392,
+ "t": 1280.29968,
+ "r": 1150.0165439999998,
+ "b": 1314.122832,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--no-defaults",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ]
+ ]
+ },
+ "annotations": []
+ },
+ {
+ "self_ref": "#/tables/6",
+ "parent": {
+ "$ref": "#/body"
+ },
+ "children": [],
+ "content_layer": "body",
+ "label": "table",
+ "prov": [
+ {
+ "page_no": 1,
+ "bbox": {
+ "l": 224.27352000000002,
+ "t": 138.81384000000003,
+ "r": 1153.274832,
+ "b": 105.69873600000005,
+ "coord_origin": "BOTTOMLEFT"
+ },
+ "charspan": [
+ 0,
+ 36
+ ]
+ }
+ ],
+ "captions": [],
+ "references": [],
+ "footnotes": [],
+ "data": {
+ "table_cells": [
+ {
+ "bbox": {
+ "l": 224.298,
+ "t": 1445.525136,
+ "r": 689.071608,
+ "b": 1478.317104,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 689.0924160000001,
+ "t": 1445.525136,
+ "r": 1152.3348,
+ "b": 1478.248992,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--print-defaults",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ],
+ "num_rows": 1,
+ "num_cols": 2,
+ "grid": [
+ [
+ {
+ "bbox": {
+ "l": 224.298,
+ "t": 1445.525136,
+ "r": 689.071608,
+ "b": 1478.317104,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 0,
+ "end_col_offset_idx": 1,
+ "text": "Command-Line Format",
+ "column_header": false,
+ "row_header": true,
+ "row_section": false,
+ "fillable": false
+ },
+ {
+ "bbox": {
+ "l": 689.0924160000001,
+ "t": 1445.525136,
+ "r": 1152.3348,
+ "b": 1478.248992,
+ "coord_origin": "TOPLEFT"
+ },
+ "row_span": 1,
+ "col_span": 1,
+ "start_row_offset_idx": 0,
+ "end_row_offset_idx": 1,
+ "start_col_offset_idx": 1,
+ "end_col_offset_idx": 2,
+ "text": "--print-defaults",
+ "column_header": false,
+ "row_header": false,
+ "row_section": false,
+ "fillable": false
+ }
+ ]
+ ]
+ },
+ "annotations": []
+ }
+ ],
+ "key_value_items": [],
+ "form_items": [],
+ "pages": {
+ "1": {
+ "size": {
+ "width": 1224.0,
+ "height": 1584.0
+ },
+ "image": {
+ "mimetype": "image/png",
+ "dpi": 72,
+ "size": {
+ "width": 1224.0,
+ "height": 1584.0
+ },
+ "uri": "GroundTruthPageImages/0"
+ },
+ "page_no": 1
+ }
+ }
+}
\ No newline at end of file
diff --git a/test/data/doc/ddoc_0.v0.gt.dt b/test/data/doc/ddoc_0.v0.gt.dt
new file mode 100644
index 00000000..7408867f
--- /dev/null
+++ b/test/data/doc/ddoc_0.v0.gt.dt
@@ -0,0 +1,237 @@
+
+ 1.0.0
+
+
+
+
+
+ ndbinfo_select_all - Select From ndbinfo Tables
+
+
+
+
+
+
+
+ Default Value
+
+ 1
+
+
+ Minimum Value
+
+ 0
+
+
+ Maximum Value
+
+ MAX_INT
+
+
+
+
+
+
+
+ This option sets the number of times to execute the select. Use --delay to set the time between loops.
+
+
+
+
+
+
+
+ • --ndb-connectstring
+
+
+
+
+
+
+
+
+ Command-Line Format
+
+ --ndb-connectstring=connection-string
+
+
+ Type
+
+ String
+
+
+ Default Value
+
+ [none]
+
+
+
+
+
+
+
+ Set connect string for connecting to ndb_mgmd. Syntax: "[nodeid=id;][host=]hostname[:port]". Overrides entries in NDB_CONNECTSTRING and my.cnf.
+
+
+
+
+
+
+
+ • --ndb-mgmd-host
+
+
+
+
+
+
+
+
+ Command-Line Format
+
+ --ndb-mgmd-host=connection-string
+
+
+ Type
+
+ String
+
+
+ Default Value
+
+ [none]
+
+
+
+
+
+
+
+ Same as --ndb-connectstring .
+
+
+
+
+
+
+
+ • --ndb-nodeid
+
+
+
+
+
+
+
+
+ Command-Line Format
+
+ --ndb-nodeid=#
+
+
+ Type
+
+ Integer
+
+
+ Default Value
+
+ [none]
+
+
+
+
+
+
+
+ Set node ID for this node, overriding any ID set by --ndb-connectstring.
+
+
+
+
+
+
+
+ • --ndb-optimized-node-selection
+
+
+
+
+
+
+
+
+ Command-Line Format
+
+ --ndb-optimized-node-selection
+
+
+
+
+
+
+
+ Enable optimizations for selection of nodes for transactions. Enabled by default; use --skip-ndb- optimized-node-selection to disable.
+
+
+
+
+
+
+
+ • --no-defaults
+
+
+
+
+
+
+
+
+ Command-Line Format
+
+ --no-defaults
+
+
+
+
+
+
+
+ Do not read default options from any option file other than login file.
+
+
+
+
+
+
+
+ • --print-defaults
+
+
+
+
+
+
+
+
+ Command-Line Format
+
+ --print-defaults
+
+
+
+
+
+
+
+ Print program argument list and exit.
+
+
+
+
+
+
+ 4253
+
+
diff --git a/test/data/doc/ddoc_0.v1.gt.dt b/test/data/doc/ddoc_0.v1.gt.dt
new file mode 100644
index 00000000..90808c23
--- /dev/null
+++ b/test/data/doc/ddoc_0.v1.gt.dt
@@ -0,0 +1,192 @@
+
+ 1.0.0
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/data/doc/ddoc_0.v2.gt.dt b/test/data/doc/ddoc_0.v2.gt.dt
new file mode 100644
index 00000000..bba8d2b9
--- /dev/null
+++ b/test/data/doc/ddoc_0.v2.gt.dt
@@ -0,0 +1 @@
+1.0.0
diff --git a/test/test_doc_schema.py b/test/test_doc_schema.py
index b03a12e5..9776e791 100644
--- a/test/test_doc_schema.py
+++ b/test/test_doc_schema.py
@@ -43,7 +43,7 @@ def test_ccs_document():
assert False, f"Data in file {filename} should be invalid for CCSDocument model"
except ValidationError as e:
for error in e.errors():
- print(type(error))
+ # print(type(error))
assert all(
item in error["loc"] for item in ("description", "logs")
), f"Data in file {filename} should fail in logs"
diff --git a/test/test_docling_doc.py b/test/test_docling_doc.py
index f10c978f..5a043f57 100644
--- a/test/test_docling_doc.py
+++ b/test/test_docling_doc.py
@@ -724,7 +724,7 @@ def _test_export_methods(
second_page = first_page + 1
if second_page in doc.pages: # Only test if document has at least 2 pages
dt_pages_pred = doc.export_to_doctags(pages={first_page, second_page})
- print(dt_pages_pred)
+ # print(dt_pages_pred)
_verify_regression_test(dt_pages_pred, filename=filename, ext="pages.dt")
# Test Tables export ...
diff --git a/test/test_doctags_content_suppression.py b/test/test_doctags_content_suppression.py
new file mode 100644
index 00000000..1a6cf4b2
--- /dev/null
+++ b/test/test_doctags_content_suppression.py
@@ -0,0 +1,66 @@
+from docling_core.transforms.serializer.doctags import (
+ DocTagsDocSerializer,
+ DocTagsParams,
+)
+from docling_core.types.doc.document import DoclingDocument, TableData
+from docling_core.types.doc.labels import DocItemLabel
+
+
+def serialize_doctags(doc: DoclingDocument, **param_overrides) -> str:
+ params = DocTagsParams(**param_overrides)
+ ser = DocTagsDocSerializer(doc=doc, params=params)
+ return ser.serialize().text
+
+
+def test_no_content_suppresses_caption_and_table_cell_text():
+ doc = DoclingDocument(name="t")
+
+ # Add a caption text item
+ cap = doc.add_text(label=DocItemLabel.CAPTION, text="Table Caption Text")
+
+ # Build a 2x2 table with header row and data row
+ td = TableData(num_rows=0, num_cols=2)
+ td.add_row(["H1", "H2"]) # header
+ td.add_row(["C1", "C2"]) # data
+ doc.add_table(data=td, caption=cap)
+
+ txt = serialize_doctags(doc, add_content=False)
+
+ # Caption text suppressed
+ assert "Table Caption Text" not in txt
+
+ # No table cell text
+ for cell_text in ["H1", "H2", "C1", "C2"]:
+ assert cell_text not in txt
+
+ # OTSL structural tokens should remain
+ assert "" in txt and "" in txt
+
+
+def test_no_content_suppresses_figure_caption_text():
+ doc = DoclingDocument(name="t")
+ cap = doc.add_text(label=DocItemLabel.CAPTION, text="Figure Caption Text")
+ doc.add_picture(caption=cap)
+
+ txt = serialize_doctags(doc, add_content=False)
+ assert "Figure Caption Text" not in txt
+
+
+def test_list_items_not_double_wrapped_when_no_content():
+ doc = DoclingDocument(name="t")
+ lst = doc.add_list_group()
+ doc.add_list_item("Item A", parent=lst)
+ doc.add_list_item("Item B", parent=lst)
+
+ txt = serialize_doctags(doc, add_content=True)
+ print(f"txt with content:\n{txt}")
+
+ txt = serialize_doctags(doc, add_content=False)
+ print(f"txt without content:\n{txt}")
+
+ # No nested
+ assert "" not in txt
+
+ # Should still have exactly two opening list_item wrappers (for the two items)
+ # Note: other occurrences could appear in location tokens etc., so be conservative
+ assert txt.count("") >= 2
diff --git a/test/test_json_schema_to_search_mapper.py b/test/test_json_schema_to_search_mapper.py
index b2d15786..9a6acbe4 100644
--- a/test/test_json_schema_to_search_mapper.py
+++ b/test/test_json_schema_to_search_mapper.py
@@ -56,7 +56,7 @@ def test_json_schema_to_search_mapper_0():
def test_json_schema_to_search_mapper_1():
"""Test the class JsonSchemaToSearchMapper."""
s = Record.model_json_schema()
- print(json.dumps(s, indent=2))
+ # print(json.dumps(s, indent=2))
_meta = {
"aliases": [".production", "ccc"],
diff --git a/test/test_otsl_table_export.py b/test/test_otsl_table_export.py
index 4b3534f3..84dd5005 100644
--- a/test/test_otsl_table_export.py
+++ b/test/test_otsl_table_export.py
@@ -274,10 +274,14 @@ def test_table_export_to_otsl():
otsl_string = doc.tables[0].export_to_otsl(
add_cell_location=False, add_cell_text=False, doc=doc
)
- print_friendly = otsl_string.split("")
- print("OTSL out:")
+ otsl_string.split("")
+ # print("OTSL out:")
+
+ """
for s in print_friendly:
print(s)
+ """
+
assert (
otsl_string
== ""
diff --git a/test/test_serialization.py b/test/test_serialization.py
index 3f17492e..a783c410 100644
--- a/test/test_serialization.py
+++ b/test/test_serialization.py
@@ -4,9 +4,12 @@
import pytest
-from docling_core.experimental.idoctags import IDocTagsDocSerializer
+from docling_core.experimental.idoctags import IDocTagsDocSerializer, IDocTagsParams
from docling_core.transforms.serializer.common import _DEFAULT_LABELS
-from docling_core.transforms.serializer.doctags import DocTagsDocSerializer
+from docling_core.transforms.serializer.doctags import (
+ DocTagsDocSerializer,
+ DocTagsParams,
+)
from docling_core.transforms.serializer.html import (
HTMLDocSerializer,
HTMLOutputStyle,
@@ -38,6 +41,16 @@ def verify(exp_file: Path, actual: str):
with open(exp_file, "r", encoding="utf-8") as f:
expected = f.read().rstrip()
+ # Normalize platform-dependent quote escaping for DocTags outputs
+ name = exp_file.name
+ if name.endswith(".dt") or name.endswith(".idt.xml"):
+
+ def _normalize_quotes(s: str) -> str:
+ return s.replace(""", '"').replace(""", '"')
+
+ expected = _normalize_quotes(expected)
+ actual = _normalize_quotes(actual)
+
assert expected == actual
@@ -593,6 +606,43 @@ def test_doctags_meta():
# ===============================
+def test_idoctags():
+ src = Path("./test/data/doc/ddoc_0.json")
+ doc = DoclingDocument.load_from_json(src)
+
+ if True:
+ # Human readable, indented and with content
+ params = IDocTagsParams()
+ params.add_content = True
+
+ ser = IDocTagsDocSerializer(doc=doc, params=params)
+ actual = ser.serialize().text
+
+ verify(exp_file=src.with_suffix(".v0.gt.dt"), actual=actual)
+
+ if True:
+ # Human readable, indented but without content
+ params = IDocTagsParams()
+ params.add_content = False
+
+ ser = IDocTagsDocSerializer(doc=doc, params=params)
+ actual = ser.serialize().text
+
+ verify(exp_file=src.with_suffix(".v1.gt.dt"), actual=actual)
+
+ if True:
+ # Machine readable, not indented and without content
+ params = IDocTagsParams()
+ params.pretty_indentation = ""
+ params.add_content = False
+ params.mode = DocTagsParams.Mode.MINIFIED
+
+ ser = IDocTagsDocSerializer(doc=doc, params=params)
+ actual = ser.serialize().text
+
+ verify(exp_file=src.with_suffix(".v2.gt.dt"), actual=actual)
+
+
def test_idoctags_meta():
src = Path("./test/data/doc/dummy_doc_with_meta.yaml")
doc = DoclingDocument.load_from_yaml(src)