Use stricter typing for tool calls (following kosong library) (#127)

joschu · web-flow · commit 53c6c38a2b12 · 2025-11-24T22:58:32.000-08:00
diff --git a/tinker_cookbook/recipes/tool_use/search/search_env.py b/tinker_cookbook/recipes/tool_use/search/search_env.py
@@ -164,19 +164,23 @@ async def step(self, action: Action) -> StepResult:
         self.past_messages.append(message)
 
         if "tool_calls" in message:
+            tool_calls = message["tool_calls"]
             failure_result = StepResult(
                 reward=0.0,
                 episode_done=True,
                 next_observation=tinker.ModelInput.empty(),
                 next_stop_condition=self.stop_condition,
             )
-            if message["tool_calls"][0]["name"] == "search":
+            # Check if tool_calls list is not empty
+            if not tool_calls:
+                return failure_result
+            if tool_calls[0].function.name == "search":
                 self.current_num_calls += 1
                 if self.current_num_calls > self.max_num_calls:
                     return failure_result
                 # NOTE(tianyi): seems wasteful: we should share the client somehow
                 try:
-                    tool_return_message = await self.call_search_tool(message["tool_calls"][0])
+                    tool_return_message = await self.call_search_tool(tool_calls[0])
                     self.past_messages.extend(tool_return_message)
                 except Exception as e:
                     logger.error(f"Error calling search tool: {repr(e)}")
diff --git a/tinker_cookbook/recipes/tool_use/search/tools.py b/tinker_cookbook/recipes/tool_use/search/tools.py
@@ -1,4 +1,5 @@
 import asyncio
+import json
 import logging
 from abc import ABC, abstractmethod
 from typing import Any
@@ -142,17 +143,27 @@ async def _query_chroma_with_retry(self, query_embeddings: list[list[float]]) ->
         raise RuntimeError("All ChromaDB query attempts failed")
 
     async def invoke(self, tool_call: ToolCall) -> list[Message]:
-        if tool_call["name"] != "search":
-            raise ValueError(f"Invalid tool name: {tool_call['name']}")
-        if not isinstance(tool_call["args"], dict) or "query_list" not in tool_call["args"]:
+        if tool_call.function.name != "search":
+            raise ValueError(f"Invalid tool name: {tool_call.function.name}")
+
+        # Parse arguments with error handling
+        try:
+            args = json.loads(tool_call.function.arguments)
+        except json.JSONDecodeError as e:
+            return [
+                Message(
+                    role="tool",
+                    content=f"Error invoking search tool: Invalid JSON in arguments - {str(e)}",
+                )
+            ]
+
+        query_list = args.get("query_list")
+        if not isinstance(query_list, list):
             return [
                 Message(role="tool", content="Error invoking search tool: query_list is required")
             ]
-        query_list = tool_call["args"]["query_list"]
-        if (
-            not isinstance(query_list, list)
-            or not len(query_list) > 0
-            or not all(isinstance(query, str) and len(query.strip()) > 0 for query in query_list)
+        if not query_list or not all(
+            isinstance(query, str) and query.strip() for query in query_list
         ):
             return [
                 Message(
diff --git a/tinker_cookbook/renderers.py b/tinker_cookbook/renderers.py
@@ -8,20 +8,126 @@
 import re
 from datetime import datetime
 from enum import StrEnum
-from typing import Callable, NotRequired, TypedDict
+from typing import Callable, Literal, NotRequired, TypedDict
 
 import tinker
 import torch
+import pydantic
 
 from tinker_cookbook.tokenizer_utils import Tokenizer
 
 logger = logging.getLogger(__name__)
 
+# Tool types are based on kosong (https://github.com/MoonshotAI/kosong).
 
-class ToolCall(TypedDict):
-    name: str
-    # Each argument is a stringified JSON object
-    args: dict[str, str]
+
+class StrictBase(pydantic.BaseModel):
+    """
+    Pydantic base class that's immutable and doesn't silently ignore extra fields.
+    """
+
+    model_config = pydantic.ConfigDict(frozen=True, extra="forbid")
+
+    def __str__(self) -> str:
+        return repr(self)
+
+
+class ToolCall(StrictBase):
+    """
+    Structured tool invocation following OpenAI/kosong format.
+
+    This represents a request to invoke a tool/function. The structure follows
+    the OpenAI function calling format for compatibility with various LLM APIs.
+
+    Example:
+        tool_call = ToolCall(
+            function=ToolCall.FunctionBody(
+                name="search",
+                arguments='{"query_list": ["python async", "pydantic validation"]}'
+            ),
+            id="call_abc123"
+        )
+    """
+
+    class FunctionBody(pydantic.BaseModel):
+        """
+        Tool call function body containing the tool name and arguments.
+
+        The arguments field must be a valid JSON string that will be parsed
+        by the tool implementation.
+        """
+
+        name: str
+        """The name of the tool to be called."""
+        arguments: str
+        """Arguments of the tool call in JSON string format."""
+
+    type: Literal["function"] = "function"
+    """Tool call type, must be 'function' for compatibility."""
+
+    id: str | None = None
+    """Optional unique identifier for tracking this specific tool call."""
+
+    function: FunctionBody
+    """The function body containing tool name and arguments."""
+
+
+class ToolOk(StrictBase):
+    """
+    Successful tool execution result.
+
+    Used to indicate that a tool call completed successfully, with
+    the main output and optional metadata fields.
+    """
+
+    output: str
+    """The main output/result from the tool execution."""
+
+    message: str = ""
+    """Optional human-readable message about the execution."""
+
+    brief: str = ""
+    """Optional brief summary of the result for logging."""
+
+
+class ToolError(StrictBase):
+    """
+    Tool execution error result.
+
+    Used to indicate that a tool call failed or encountered an error,
+    with details about what went wrong.
+    """
+
+    output: str = ""
+    """Any partial output that was generated before the error."""
+
+    message: str = ""
+    """Error message describing what went wrong."""
+
+    brief: str = ""
+    """Brief error summary for logging."""
+
+
+ToolReturnType = ToolOk | ToolError
+"""Union type for tool execution results - either success or error."""
+
+
+class ToolResult(StrictBase):
+    """
+    Complete tool execution result with tracking ID.
+
+    Wraps the actual result (ToolOk or ToolError) with the corresponding
+    tool call ID for correlation in multi-tool scenarios.
+
+    Note: This class is defined for future use in handling multiple
+    concurrent tool calls with result correlation.
+    """
+
+    tool_call_id: str | None
+    """ID of the tool call this result corresponds to."""
+
+    result: ToolReturnType
+    """The actual execution result (success or error)."""
 
 
 # NOTE: we use a broad type definition for the role to be flexible
@@ -35,6 +141,17 @@ class Message(TypedDict):
     tool_calls: NotRequired[list[ToolCall]]
     thinking: NotRequired[str]
     trainable: NotRequired[bool]
+    tool_call_id: NotRequired[str]
+    name: NotRequired[str]
+
+
+def _tool_call_payload(tool_call: ToolCall) -> dict[str, object]:
+    """Minimal JSON payload for embedding in <tool_call> blocks."""
+    # Convert from nested structure to flat format for compatibility
+    return {
+        "name": tool_call.function.name,
+        "args": json.loads(tool_call.function.arguments),
+    }
 
 
 class TrainOnWhat(StrEnum):
@@ -369,7 +486,7 @@ def _render_message(self, idx: int, message: Message) -> tuple[list[int], list[i
         if "tool_calls" in message:
             ac_content += "\n".join(
                 [
-                    f"<tool_call>\n{json.dumps(tool_call)}\n</tool_call>"
+                    f"<tool_call>\n{json.dumps(_tool_call_payload(tool_call))}\n</tool_call>"
                     for tool_call in message["tool_calls"]
                 ]
             )
@@ -425,15 +542,20 @@ def _parse_tool_call(self, tool_call_str: str) -> list[ToolCall] | None:
 
         if not isinstance(tool_call, dict):
             return None
-        if (
-            "name" not in tool_call
-            or "args" not in tool_call
-            or not isinstance(tool_call["name"], str)
-            or not isinstance(tool_call["args"], dict)
-        ):
+        name = tool_call.get("name")
+        args = tool_call.get("args")
+        tool_id = tool_call.get("id")
+        if not isinstance(name, str) or not isinstance(args, dict):
             return None
-
-        return [ToolCall(**tool_call)]
+        if tool_id is not None and not isinstance(tool_id, str):
+            tool_id = None
+        # Convert to nested structure with arguments as JSON string
+        return [
+            ToolCall(
+                function=ToolCall.FunctionBody(name=name, arguments=json.dumps(args)),
+                id=tool_id,
+            )
+        ]
 
     def parse_response(self, response: list[int]) -> tuple[Message, bool]:
         assistant_message, parse_success = parse_response_for_stop_token(
@@ -485,7 +607,7 @@ def _render_message(self, idx: int, message: Message) -> tuple[list[int], list[i
         if "tool_calls" in message:
             ac_content += "\n".join(
                 [
-                    f"<tool_call>\n{json.dumps(tool_call)}\n</tool_call>"
+                    f"<tool_call>\n{json.dumps(_tool_call_payload(tool_call))}\n</tool_call>"
                     for tool_call in message["tool_calls"]
                 ]
             )