@@ -384,8 +384,42 @@ def _iter_reasoning_texts(reasoning_value: Any) -> Iterable[str]:
384384 yield str (reasoning_value )
385385
386386
387+ def _is_thinking_blocks_format (reasoning_value : Any ) -> bool :
388+ """Returns True if reasoning_value is Anthropic thinking_blocks format.
389+
390+ Anthropic thinking_blocks is a list of dicts, each with 'type', 'thinking',
391+ and 'signature' keys.
392+ """
393+ if not isinstance (reasoning_value , list ) or not reasoning_value :
394+ return False
395+ first = reasoning_value [0 ]
396+ return isinstance (first , dict ) and "signature" in first
397+
398+
387399def _convert_reasoning_value_to_parts (reasoning_value : Any ) -> List [types .Part ]:
388- """Converts provider reasoning payloads into Gemini thought parts."""
400+ """Converts provider reasoning payloads into Gemini thought parts.
401+
402+ Handles Anthropic thinking_blocks (list of dicts with type/thinking/signature)
403+ by preserving the signature on each part's thought_signature field. This is
404+ required for Anthropic to maintain thinking across tool call boundaries.
405+ """
406+ if _is_thinking_blocks_format (reasoning_value ):
407+ parts : List [types .Part ] = []
408+ for block in reasoning_value :
409+ if not isinstance (block , dict ):
410+ continue
411+ block_type = block .get ("type" , "" )
412+ if block_type == "redacted" :
413+ continue
414+ thinking_text = block .get ("thinking" , "" )
415+ signature = block .get ("signature" , "" )
416+ if not thinking_text :
417+ continue
418+ part = types .Part (text = thinking_text , thought = True )
419+ if signature :
420+ part .thought_signature = signature .encode ("utf-8" )
421+ parts .append (part )
422+ return parts
389423 return [
390424 types .Part (text = text , thought = True )
391425 for text in _iter_reasoning_texts (reasoning_value )
@@ -396,12 +430,19 @@ def _convert_reasoning_value_to_parts(reasoning_value: Any) -> List[types.Part]:
396430def _extract_reasoning_value (message : Message | Delta | None ) -> Any :
397431 """Fetches the reasoning payload from a LiteLLM message.
398432
399- Checks for both 'reasoning_content' (LiteLLM standard, used by Azure/Foundry,
400- Ollama via LiteLLM) and 'reasoning' (used by LM Studio, vLLM).
401- Prioritizes 'reasoning_content' when both are present.
433+ Checks for 'thinking_blocks' (Anthropic structured format with signatures),
434+ 'reasoning_content' (LiteLLM standard, used by Azure/Foundry, Ollama via
435+ LiteLLM) and 'reasoning' (used by LM Studio, vLLM).
436+ Prioritizes 'thinking_blocks' when present (Anthropic models), then
437+ 'reasoning_content', then 'reasoning'.
402438 """
403439 if message is None :
404440 return None
441+ # Anthropic models return thinking_blocks with type/thinking/signature fields.
442+ # This must be preserved to maintain thinking across tool call boundaries.
443+ thinking_blocks = message .get ("thinking_blocks" )
444+ if thinking_blocks is not None :
445+ return thinking_blocks
405446 reasoning_content = message .get ("reasoning_content" )
406447 if reasoning_content is not None :
407448 return reasoning_content
@@ -835,6 +876,30 @@ async def _content_to_message_param(
835876 else final_content
836877 )
837878
879+ # For Anthropic models, rebuild thinking_blocks with signatures so that
880+ # thinking is preserved across tool call boundaries. Without this,
881+ # Anthropic silently drops thinking after the first turn.
882+ if model and _is_anthropic_model (model ) and reasoning_parts :
883+ thinking_blocks = []
884+ for part in reasoning_parts :
885+ if part .text and part .thought_signature :
886+ sig = part .thought_signature
887+ if isinstance (sig , bytes ):
888+ sig = sig .decode ("utf-8" )
889+ thinking_blocks .append ({
890+ "type" : "thinking" ,
891+ "thinking" : part .text ,
892+ "signature" : sig ,
893+ })
894+ if thinking_blocks :
895+ msg = ChatCompletionAssistantMessage (
896+ role = role ,
897+ content = final_content ,
898+ tool_calls = tool_calls or None ,
899+ )
900+ msg ["thinking_blocks" ] = thinking_blocks # type: ignore[typeddict-unknown-key]
901+ return msg
902+
838903 reasoning_texts = []
839904 for part in reasoning_parts :
840905 if part .text :
@@ -1943,6 +2008,31 @@ def _build_request_log(req: LlmRequest) -> str:
19432008"""
19442009
19452010
2011+ def _is_anthropic_model (model_string : str ) -> bool :
2012+ """Check if the model is an Anthropic Claude model accessed via LiteLLM.
2013+
2014+ Detects models using the anthropic/ provider prefix, bedrock/ models that
2015+ contain 'anthropic' or 'claude', and vertex_ai/ models that contain 'claude'.
2016+
2017+ Args:
2018+ model_string: A LiteLLM model string (e.g., "anthropic/claude-4-sonnet",
2019+ "bedrock/anthropic.claude-3-5-sonnet", "vertex_ai/claude-4-sonnet")
2020+
2021+ Returns:
2022+ True if it's an Anthropic Claude model, False otherwise.
2023+ """
2024+ lower = model_string .lower ()
2025+ if lower .startswith ("anthropic/" ):
2026+ return True
2027+ if lower .startswith ("bedrock/" ):
2028+ model_part = lower .split ("/" , 1 )[1 ]
2029+ return "anthropic" in model_part or "claude" in model_part
2030+ if lower .startswith ("vertex_ai/" ):
2031+ model_part = lower .split ("/" , 1 )[1 ]
2032+ return "claude" in model_part
2033+ return False
2034+
2035+
19462036def _is_litellm_vertex_model (model_string : str ) -> bool :
19472037 """Check if the model is a Vertex AI model accessed via LiteLLM.
19482038
0 commit comments