33from an LLM's text stream.
44"""
55
6+ import logging
67import re
78from enum import Enum , auto
89from dataclasses import dataclass , field
910from typing import List , Dict , Any
1011
12+ log = logging .getLogger (__name__ )
13+
1114# --- Constants ---
1215# These are duplicated from callbacks for now to keep the parser self-contained.
1316# They should eventually live in a shared constants module.
1417ARTIFACT_BLOCK_DELIMITER_OPEN = "«««"
1518ARTIFACT_BLOCK_DELIMITER_CLOSE = "»»»"
16- # The full sequence that must be matched to start a block.
17- BLOCK_START_SEQUENCE = f"{ ARTIFACT_BLOCK_DELIMITER_OPEN } save_artifact:"
19+ # The full sequences that must be matched to start a block.
20+ SAVE_ARTIFACT_START_SEQUENCE = f"{ ARTIFACT_BLOCK_DELIMITER_OPEN } save_artifact:"
21+ TEMPLATE_START_SEQUENCE = f"{ ARTIFACT_BLOCK_DELIMITER_OPEN } template:"
22+ # For backward compatibility
23+ BLOCK_START_SEQUENCE = SAVE_ARTIFACT_START_SEQUENCE
1824# Regex to parse parameters from a confirmed start line.
1925PARAMS_REGEX = re .compile (r'(\w+)\s*=\s*"(.*?)"' )
2026
@@ -66,6 +72,21 @@ class BlockInvalidatedEvent(ParserEvent):
6672 rolled_back_text : str
6773
6874
75+ @dataclass
76+ class TemplateBlockStartedEvent (ParserEvent ):
77+ """Emitted when a template block's start is confirmed."""
78+
79+ params : Dict [str , Any ]
80+
81+
82+ @dataclass
83+ class TemplateBlockCompletedEvent (ParserEvent ):
84+ """Emitted when a template block is successfully closed."""
85+
86+ params : Dict [str , Any ]
87+ template_content : str
88+
89+
6990@dataclass
7091class ParserResult :
7192 """The result of processing a single text chunk."""
@@ -93,6 +114,10 @@ def __init__(self, progress_update_interval_bytes: int = 4096):
93114 self ._block_params : Dict [str , Any ] = {}
94115 self ._progress_update_interval = progress_update_interval_bytes
95116 self ._last_progress_update_size = 0
117+ # Track block type and nesting for template handling
118+ self ._current_block_type : str = None # "save_artifact" or "template"
119+ self ._nesting_depth = 0 # Track if we're inside a block
120+ self ._previous_state : ParserState = None # Track state before POTENTIAL_BLOCK
96121
97122 def _reset_state (self ):
98123 """Resets the parser to its initial IDLE state."""
@@ -101,6 +126,9 @@ def _reset_state(self):
101126 self ._artifact_buffer = ""
102127 self ._block_params = {}
103128 self ._last_progress_update_size = 0
129+ self ._current_block_type = None
130+ self ._nesting_depth = 0
131+ self ._previous_state = None
104132
105133 def process_chunk (self , text_chunk : str ) -> ParserResult :
106134 """
@@ -142,21 +170,36 @@ def finalize(self) -> ParserResult:
142170 events .append (BlockInvalidatedEvent (rolled_back_text = rolled_back_text ))
143171 elif self ._state == ParserState .IN_BLOCK :
144172 # The turn ended while inside a block. This is an error/failure.
145- # The orchestrator (callback) will see this and know to fail the artifact.
146- # We emit a BlockCompletedEvent so the orchestrator knows what was buffered.
173+ # The orchestrator (callback) will see this and know to fail the artifact/template .
174+ # We emit the appropriate completion event so the orchestrator knows what was buffered.
147175 # The orchestrator is responsible for interpreting this as a failure.
148- events .append (
149- BlockCompletedEvent (
150- params = self ._block_params , content = self ._artifact_buffer
151- )
176+ log .warning (
177+ "[StreamParser] finalize() found unterminated block! Type: %s, buffer length: %d, nesting_depth: %d" ,
178+ self ._current_block_type ,
179+ len (self ._artifact_buffer ),
180+ self ._nesting_depth ,
152181 )
182+ if self ._current_block_type == "template" :
183+ events .append (
184+ TemplateBlockCompletedEvent (
185+ params = self ._block_params ,
186+ template_content = self ._artifact_buffer ,
187+ )
188+ )
189+ else :
190+ events .append (
191+ BlockCompletedEvent (
192+ params = self ._block_params , content = self ._artifact_buffer
193+ )
194+ )
153195
154196 self ._reset_state ()
155197 return ParserResult ("" .join (user_text_parts ), events )
156198
157199 def _process_idle (self , char : str , user_text_parts : List [str ]):
158200 """State handler for when the parser is outside any block."""
159201 if char == BLOCK_START_SEQUENCE [0 ]:
202+ self ._previous_state = ParserState .IDLE
160203 self ._state = ParserState .POTENTIAL_BLOCK
161204 self ._speculative_buffer += char
162205 else :
@@ -168,59 +211,145 @@ def _process_potential(
168211 """State handler for when a block might be starting."""
169212 self ._speculative_buffer += char
170213
171- # If we have the full start sequence, we are now looking for the newline.
172- if self ._speculative_buffer .startswith (BLOCK_START_SEQUENCE ):
214+ # Check if we match save_artifact or template start sequences
215+ matched_sequence = None
216+ matched_type = None
217+
218+ if self ._speculative_buffer .startswith (SAVE_ARTIFACT_START_SEQUENCE ):
219+ matched_sequence = SAVE_ARTIFACT_START_SEQUENCE
220+ matched_type = "save_artifact"
221+ elif self ._speculative_buffer .startswith (TEMPLATE_START_SEQUENCE ):
222+ matched_sequence = TEMPLATE_START_SEQUENCE
223+ matched_type = "template"
224+
225+ if matched_sequence :
173226 if char == "\n " :
174227 # We found the newline, the block is officially started.
228+
229+ # If we're already inside a save_artifact block and this is a template,
230+ # we need to pass it through as literal text (preserve nesting)
231+ if self ._nesting_depth > 0 and matched_type == "template" :
232+ # Preserve template literally inside artifact
233+ self ._artifact_buffer += self ._speculative_buffer
234+ # Increment nesting depth so we know to skip the next »»»
235+ # (it will close the nested template, not the outer artifact)
236+ self ._nesting_depth += 1
237+ # Don't reset state! We're still inside the save_artifact block.
238+ # Just clear the speculative buffer and stay IN_BLOCK to continue
239+ # buffering the rest of the artifact content.
240+ self ._speculative_buffer = ""
241+ self ._state = ParserState .IN_BLOCK
242+ return
243+
175244 self ._state = ParserState .IN_BLOCK
245+ self ._current_block_type = matched_type
246+ self ._nesting_depth += 1
247+
176248 # Extract the parameters string between the start sequence and the newline
177- params_str = self ._speculative_buffer [len (BLOCK_START_SEQUENCE ) : - 1 ]
249+ params_str = self ._speculative_buffer [len (matched_sequence ) : - 1 ]
178250 self ._block_params = dict (PARAMS_REGEX .findall (params_str ))
179- events .append (BlockStartedEvent (params = self ._block_params ))
251+
252+ if matched_type == "save_artifact" :
253+ events .append (BlockStartedEvent (params = self ._block_params ))
254+ elif matched_type == "template" :
255+ events .append (TemplateBlockStartedEvent (params = self ._block_params ))
256+
180257 self ._speculative_buffer = "" # Clear buffer, we are done with it.
181258 # else, we are still buffering the parameters line.
182259 return
183260
184- # If we are still building up the start sequence itself
185- if BLOCK_START_SEQUENCE .startswith (self ._speculative_buffer ):
261+ # If we are still building up a start sequence (could be either)
262+ if SAVE_ARTIFACT_START_SEQUENCE .startswith (
263+ self ._speculative_buffer
264+ ) or TEMPLATE_START_SEQUENCE .startswith (self ._speculative_buffer ):
186265 # It's still a potential match. Continue buffering.
187266 return
188267
189268 # If we've reached here, the sequence is invalid.
190269 # Rollback: The sequence was invalid.
191270 rolled_back_text = self ._speculative_buffer
192- user_text_parts .append (rolled_back_text )
193- events .append (BlockInvalidatedEvent (rolled_back_text = rolled_back_text ))
194- self ._reset_state ()
271+
272+ # Check if we were IN_BLOCK before transitioning to POTENTIAL_BLOCK
273+ # If so, add the rolled-back text to the artifact buffer, not user-visible text
274+ if self ._previous_state == ParserState .IN_BLOCK :
275+ log .debug (
276+ "[StreamParser] Invalid sequence '%s' detected while IN_BLOCK. Adding to artifact buffer." ,
277+ repr (rolled_back_text ),
278+ )
279+ self ._artifact_buffer += rolled_back_text
280+ self ._speculative_buffer = ""
281+ self ._state = ParserState .IN_BLOCK
282+ # Don't emit BlockInvalidatedEvent - this is just normal artifact content
283+ else :
284+ # We were IDLE, so this is user-facing text
285+ user_text_parts .append (rolled_back_text )
286+ events .append (BlockInvalidatedEvent (rolled_back_text = rolled_back_text ))
287+ self ._speculative_buffer = ""
288+ self ._state = ParserState .IDLE
289+
290+ self ._previous_state = None
195291
196292 def _process_in_block (self , char : str , events : List [ParserEvent ]):
197293 """State handler for when the parser is inside a block, buffering content."""
294+ # Check if this might be the start of a nested block
295+ if char == BLOCK_START_SEQUENCE [0 ]:
296+ # This might be the start of a nested template block
297+ # Transition to POTENTIAL_BLOCK to check
298+ self ._previous_state = ParserState .IN_BLOCK
299+ self ._state = ParserState .POTENTIAL_BLOCK
300+ self ._speculative_buffer += char
301+ return
302+
198303 self ._artifact_buffer += char
199304
200305 # Check for the closing delimiter
201306 if self ._artifact_buffer .endswith (ARTIFACT_BLOCK_DELIMITER_CLOSE ):
202- # Block is complete.
203- final_content = self ._artifact_buffer [
204- : - len (ARTIFACT_BLOCK_DELIMITER_CLOSE )
205- ]
206- events .append (
207- BlockCompletedEvent (params = self ._block_params , content = final_content )
208- )
209- self ._reset_state ()
210- else :
211- # Check if we should emit a progress update
212- current_size = len (self ._artifact_buffer .encode ("utf-8" ))
213- if (
214- current_size - self ._last_progress_update_size
215- ) >= self ._progress_update_interval :
216- new_chunk = self ._artifact_buffer [
217- self ._last_progress_update_size : current_size
307+ # Check if this is closing a nested block or the current block
308+ if self ._nesting_depth > 1 :
309+ # This »»» is closing a nested template block, not the outer save_artifact
310+ # Keep it in the buffer and just decrement nesting
311+ self ._nesting_depth -= 1
312+ # Don't emit events, don't strip the delimiter, just continue buffering
313+ else :
314+ # This is closing the outermost block (nesting_depth == 1)
315+ # Block is complete.
316+ final_content = self ._artifact_buffer [
317+ : - len (ARTIFACT_BLOCK_DELIMITER_CLOSE )
218318 ]
219- events .append (
220- BlockProgressedEvent (
221- params = self ._block_params ,
222- buffered_size = current_size ,
223- chunk = new_chunk ,
319+
320+ # Emit the appropriate completion event based on block type
321+ if self ._current_block_type == "template" :
322+ events .append (
323+ TemplateBlockCompletedEvent (
324+ params = self ._block_params , template_content = final_content
325+ )
224326 )
225- )
226- self ._last_progress_update_size = current_size
327+ else :
328+ # Default to save_artifact behavior
329+ events .append (
330+ BlockCompletedEvent (
331+ params = self ._block_params , content = final_content
332+ )
333+ )
334+
335+ # Decrement nesting depth
336+ self ._nesting_depth = max (0 , self ._nesting_depth - 1 )
337+ self ._reset_state ()
338+ else :
339+ # Check if we should emit a progress update (only for save_artifact blocks)
340+ if self ._current_block_type == "save_artifact" :
341+ current_size = len (self ._artifact_buffer .encode ("utf-8" ))
342+ if (
343+ current_size - self ._last_progress_update_size
344+ ) >= self ._progress_update_interval :
345+ new_chunk = self ._artifact_buffer [
346+ self ._last_progress_update_size : current_size
347+ ]
348+ events .append (
349+ BlockProgressedEvent (
350+ params = self ._block_params ,
351+ buffered_size = current_size ,
352+ chunk = new_chunk ,
353+ )
354+ )
355+ self ._last_progress_update_size = current_size
0 commit comments