@@ -50,82 +50,114 @@ class ChunkType(str, Enum):
5050class SearchIndexChunkingV1PrependField (BaseModel ):
5151 """Field to prepend to chunk content"""
5252
53- dmo_name : str = Field (
54- default = "" , description = "Data Model Object name" , examples = ["udmo_1__dlm" ]
53+ dmo_name : Optional [ str ] = Field (
54+ default = None , description = "Data Model Object name" , examples = ["udmo_1__dlm" ]
5555 )
56- field_name : str = Field (
57- default = "" ,
56+ field_name : Optional [ str ] = Field (
57+ default = None ,
5858 description = "Field name to prepend" ,
5959 examples = ["ResolvedFilePath__c" ],
6060 )
61- value : str = Field (
62- default = "" ,
61+ value : Optional [ str ] = Field (
62+ default = None ,
6363 description = "Field value to prepend" ,
6464 examples = ["udlo_1__dll:quarterly_report.pdf" ],
6565 )
6666 model_config = ConfigDict (extra = "ignore" )
6767
6868
6969class SearchIndexChunkingV1TranscriptField (BaseModel ):
70- """Field to prepend to chunk content """
70+ """Transcript timing and speaker metadata for audio/video documents """
7171
72- speaker : str = Field (
73- default = "" ,
72+ speaker : Optional [ str ] = Field (
73+ default = None ,
7474 description = "Speaker name for audio/video transcripts" ,
7575 examples = ["Agent" ],
7676 )
77- start_timestamp : str = Field (
78- default = "" ,
77+ start_timestamp : Optional [ str ] = Field (
78+ default = None ,
7979 description = "Start timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff" ,
8080 examples = ["2026-03-25T02:01:24.918000" ],
8181 )
82- end_timestamp : str = Field (
83- default = "" ,
82+ end_timestamp : Optional [ str ] = Field (
83+ default = None ,
8484 description = "End timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff" ,
8585 examples = ["2026-03-25T02:01:30.500000" ],
8686 )
8787 model_config = ConfigDict (extra = "ignore" )
8888
8989
9090class SearchIndexChunkingV1Metadata (BaseModel ):
91- """Metadata for input documents"""
91+ """Metadata for input documents. """
9292
93- type : DocumentType = Field (
94- default = DocumentType .TEXT , description = "Document type (text)" , examples = ["text" ]
95- )
96- transcript_fields : SearchIndexChunkingV1TranscriptField = Field (
97- default_factory = SearchIndexChunkingV1TranscriptField ,
93+ type : Optional [DocumentType ] = Field (
94+ default = DocumentType .TEXT ,
9895 description = (
99- "Transcript information. Will only be there in case of audio-video files "
96+ "Document type of the chunk input. Currently only 'text' is supported. "
10097 ),
98+ examples = ["text" ],
10199 )
102- page_number : int = Field (
103- default = 0 ,
104- description = "Page number in the source document (0-based)" ,
100+ page_number : Optional [ int ] = Field (
101+ default = None ,
102+ description = ( "Page number in the source document (0-based). " ) ,
105103 examples = [1 ],
106104 )
105+ transcript_fields : Optional [SearchIndexChunkingV1TranscriptField ] = Field (
106+ default = None ,
107+ description = (
108+ "Speaker and timestamp metadata for audio/video transcripts. "
109+ "Optional — only present when the source document is a transcript."
110+ ),
111+ )
107112 text_as_html : Optional [str ] = Field (
108113 default = None ,
109- description = "HTML representation of the document text" ,
114+ description = ( "HTML representation of the chunk text, if available. " ) ,
110115 examples = ["<p>Online Remittance Instructions</p>" ],
111116 )
112- source_dmo_fields : Dict [str , Union [str , int ]] = Field (
113- default_factory = dict ,
117+ source_dmo_fields : Optional [ Dict [str , Union [str , int , float ] ]] = Field (
118+ default = None ,
114119 description = (
115- "Source Data Model Object fields as key-value pairs "
116- "(values can be string or int) "
120+ "Source Data Model Object fields as key-value pairs. "
121+ "Values can be string, int, or float. "
117122 ),
118123 examples = [
119124 {
120125 "FilePath__c" : "quarterly_report.pdf" ,
121- "Size__c" : 1377454 ,
126+ "Size__c" : 1377454.0 ,
122127 "ContentType__c" : "pdf" ,
123128 "LastModified__c" : "2026-03-25T02:01:24.918000" ,
124129 }
125130 ],
126131 )
127- prepend : List [SearchIndexChunkingV1PrependField ] = Field (
128- default_factory = list , description = "List of fields to prepend to each chunk"
132+ prepend : Optional [List [SearchIndexChunkingV1PrependField ]] = Field (
133+ default = None ,
134+ description = (
135+ "List of DMO fields whose values are prepended to the chunk "
136+ "text before indexing"
137+ ),
138+ )
139+ image_base64 : Optional [str ] = Field (
140+ default = None ,
141+ description = (
142+ "Base64-encoded image data associated with this chunk. "
143+ "Optional — only applicable for image-type document elements."
144+ ),
145+ )
146+ image_mime_type : Optional [str ] = Field (
147+ default = None ,
148+ description = (
149+ "MIME type of the associated image (e.g., 'image/png', 'image/jpeg'). "
150+ "Optional — should be provided alongside image_base64 when present."
151+ ),
152+ examples = ["image/png" , "image/jpeg" ],
153+ )
154+ image_type : Optional [str ] = Field (
155+ default = None ,
156+ description = (
157+ "Semantic category of the image content"
158+ "(e.g., 'diagram', 'screenshot', 'chart'). Optional."
159+ ),
160+ examples = ["diagram" , "screenshot" ],
129161 )
130162 model_config = ConfigDict (extra = "ignore" )
131163
@@ -143,9 +175,12 @@ class SearchIndexChunkingV1DocElement(BaseModel):
143175 )
144176 ],
145177 )
146- metadata : SearchIndexChunkingV1Metadata = Field (
147- default_factory = SearchIndexChunkingV1Metadata ,
148- description = "Source document metadata" ,
178+ metadata : Optional [SearchIndexChunkingV1Metadata ] = Field (
179+ default = None ,
180+ description = (
181+ "Source document metadata. Optional — may be absent if no "
182+ "metadata is available for the document element."
183+ ),
149184 )
150185 model_config = ConfigDict (extra = "ignore" )
151186
@@ -159,21 +194,25 @@ class SearchIndexChunkingV1Output(BaseModel):
159194 examples = ["Online Remittance Instructions" ],
160195 )
161196 seq_no : int = Field (
162- default = 0 , description = "Sequential chunk number (1-based)" , ge = 1 , examples = [1 ]
163- )
164- chunk_id : str = Field (
165- default = "" ,
166- description = "Unique identifier for this chunk (UUID format)" ,
167- examples = ["550e8400-e29b-41d4-a716-446655440000" ],
197+ default = 0 ,
198+ description = (
199+ "Sequential order of this chunk within the output "
200+ "Represents chunk ordering within the source document (1-based)."
201+ ),
202+ ge = 1 ,
203+ examples = [1 ],
168204 )
169205 chunk_type : ChunkType = Field (
170206 default = ChunkType .TEXT ,
171- description = "Type of chunk (e.g., 'text') " ,
207+ description = "Type of chunk. Fixed value — always 'text'. " ,
172208 examples = ["text" ],
173209 )
174- citations : Dict [str , str ] = Field (
175- default_factory = dict ,
176- description = "Citation information as key-value pairs" ,
210+ citations : Optional [Dict [str , str ]] = Field (
211+ default = None ,
212+ description = (
213+ "Citation metadata associated with this chunk as key-value "
214+ "pairs. Optional — defaults to None if no citations are present."
215+ ),
177216 examples = [{"source" : "quarterly_report.pdf" }],
178217 )
179218 model_config = ConfigDict (extra = "ignore" )
@@ -194,4 +233,3 @@ class SearchIndexChunkingV1Response(BaseModel):
194233 output : List [SearchIndexChunkingV1Output ] = Field (
195234 default_factory = list , description = "Flat list of chunks from all docs"
196235 )
197- model_config = ConfigDict (extra = "ignore" )
0 commit comments