diff --git a/ddev/hatch.toml b/ddev/hatch.toml index 8a07ebbf5c149..a5fa11668e95f 100644 --- a/ddev/hatch.toml +++ b/ddev/hatch.toml @@ -9,6 +9,7 @@ python = "3.13" e2e-env = false dependencies = [ "pyyaml", + "pytest-asyncio", "vcrpy", ] # TODO: remove this when the old CLI is gone diff --git a/ddev/pyproject.toml b/ddev/pyproject.toml index 8cfbc07271cf7..33af6ea6cbbeb 100644 --- a/ddev/pyproject.toml +++ b/ddev/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] dependencies = [ - "anthropic>=0.18.0", + "anthropic>=0.86.0", "click~=8.1.6", "coverage", "datadog-api-client==2.20.0", @@ -136,3 +136,6 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] #Tests can use assertions and relative imports "**/tests/**/*" = ["I252"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" diff --git a/ddev/src/ddev/ai/agent/__init__.py b/ddev/src/ddev/ai/agent/__init__.py new file mode 100644 index 0000000000000..75c6647cb9233 --- /dev/null +++ b/ddev/src/ddev/ai/agent/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/src/ddev/ai/agent/client.py b/ddev/src/ddev/ai/agent/client.py new file mode 100644 index 0000000000000..d576429015cef --- /dev/null +++ b/ddev/src/ddev/ai/agent/client.py @@ -0,0 +1,219 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from copy import deepcopy +from dataclasses import dataclass +from enum import StrEnum +from typing import Any, Final + +import anthropic +from anthropic.types import MessageParam, ToolParam, ToolResultBlockParam + +from ddev.ai.tools.core.registry import ToolRegistry + +from .exceptions import ( + AgentAPIError, + AgentConnectionError, + AgentError, + AgentRateLimitError, +) + +DEFAULT_MODEL: Final[str] = "claude-sonnet-4-6" +DEFAULT_MAX_TOKENS: Final[int] = 8192 # max tokens per response +ALLOWED_TOOL_CALLERS: Final = ["code_execution_20260120"] + + +class StopReason(StrEnum): + """Maps Anthropic API stop_reason strings to a typed enum.""" + + END_TURN = "end_turn" + MAX_TOKENS = "max_tokens" + STOP_SEQUENCE = "stop_sequence" + TOOL_USE = "tool_use" + PAUSE_TURN = "pause_turn" + REFUSAL = "refusal" + + +@dataclass(frozen=True) +class ToolCall: + """A single tool invocation requested by the model.""" + + id: str + name: str + input: dict[str, Any] + + +@dataclass(frozen=True) +class ContextUsage: + """Context window accounting for a single API call.""" + + window_size: int + used_tokens: int + + @property + def context_pct(self) -> float: + return self.used_tokens / self.window_size * 100 + + @property + def remaining_tokens(self) -> int: + return self.window_size - self.used_tokens + + +@dataclass(frozen=True) +class TokenUsage: + """Token accounting from a single API call.""" + + input_tokens: int # tokens sent to the model (system_prompt + history) + output_tokens: int # tokens the model generated + cache_read_input_tokens: int # tokens read from prompt cache + cache_creation_input_tokens: int # tokens written to prompt cache + context: ContextUsage + + +@dataclass(frozen=True) +class AgentResponse: + """The complete response from a single AnthropicAgent.send() call. + Adds useful metadata to the response of the Anthropic API.""" + + stop_reason: StopReason + text: str + tool_calls: list[ToolCall] + usage: TokenUsage + + +class AnthropicAgent: + """A wrapper around the Anthropic API that provides a simple interface for interacting with agents.""" + + def __init__( + self, + client: anthropic.AsyncAnthropic, + tools: ToolRegistry, + system_prompt: str, + name: str, + model: str = DEFAULT_MODEL, + max_tokens: int = DEFAULT_MAX_TOKENS, + programmatic_tool_calling: bool = False, + ) -> None: + """Initialize an AnthropicAgent. + Args: + client: The Anthropic client to use. + tools: The ToolRegistry to use (might not be used in every call if allowed_tools in send() is provided) + system_prompt: The system prompt to use. + name: The name of the agent. + model: The model to use. + max_tokens: The max tokens per response. + programmatic_tool_calling: Whether to allow programmatic tool calling. + """ + + self._client = client + self._tools = tools + self._system_prompt = system_prompt + self.name = name + self._model = model + self._max_tokens = max_tokens + self._programmatic_tool_calling = programmatic_tool_calling + self._history: list[MessageParam] = [] + self._context_window: int | None = None + + @property + def history(self) -> list[MessageParam]: + """Read-only snapshot of the conversation history.""" + return deepcopy(self._history) + + def reset(self) -> None: + """Clear conversation history to start a new conversation.""" + self._history = [] + + async def _get_context_window(self) -> int: + if self._context_window is None: + info = await self._client.models.retrieve(self._model) + self._context_window = info.max_input_tokens + return self._context_window + + def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list[ToolParam]: + """Filter tool definitions by allowlist. None means all tools.""" + definitions = self._tools.definitions + if allowed_tools is not None: + allowed = set(allowed_tools) + definitions = [d for d in definitions if d["name"] in allowed] + if not self._programmatic_tool_calling: + definitions = [{**d, "allowed_callers": ALLOWED_TOOL_CALLERS} for d in definitions] + return definitions + + async def send( + self, + content: str | list[ToolResultBlockParam], + allowed_tools: list[str] | None = None, + ) -> AgentResponse: + """Send a message to the agent and return the response. + Args: + content: The content to send to the agent. + allowed_tools: The tools in the ToolRegistry to allow the agent to use. + Returns: + An AgentResponse object containing the response from the agent. + """ + tool_defs = self._get_tool_definitions(allowed_tools) + + user_msg: MessageParam = {"role": "user", "content": content} + messages = [*self._history, user_msg] + + try: + response = await self._client.messages.create( + model=self._model, + max_tokens=self._max_tokens, + system=self._system_prompt, + messages=messages, + tools=tool_defs if tool_defs else anthropic.NOT_GIVEN, + ) + except anthropic.APIConnectionError as e: + raise AgentConnectionError(f"Connection failed: {e}") from e + except anthropic.RateLimitError as e: + raise AgentRateLimitError(f"Rate limit exceeded: {e}") from e + except anthropic.APIStatusError as e: + raise AgentAPIError(e.status_code, e.message) from e + except anthropic.APIResponseValidationError as e: + raise AgentError(f"Response validation failed: {e}") from e + + # stop_reason is None only in streaming responses; we use non-streaming, so None is unexpected + if response.stop_reason is None: + raise AgentError("Received null stop_reason from API") + + try: + stop_reason = StopReason(response.stop_reason) + except ValueError as e: + raise AgentError(f"Unknown stop_reason: {response.stop_reason!r}") from e + + text_parts: list[str] = [] + tool_calls: list[ToolCall] = [] + + for block in response.content: + if isinstance(block, anthropic.types.TextBlock): + text_parts.append(block.text) + elif isinstance(block, anthropic.types.ToolUseBlock): + tool_calls.append(ToolCall(id=block.id, name=block.name, input=dict(block.input))) + # ThinkingBlock and RedactedThinkingBlock are intentionally ignored. + # Extended thinking support can add a `thinking: str` field to AgentResponse later. + + cache_read = response.usage.cache_read_input_tokens or 0 + cache_creation = response.usage.cache_creation_input_tokens or 0 + used_tokens = response.usage.input_tokens + cache_read + cache_creation + usage = TokenUsage( + input_tokens=response.usage.input_tokens, + output_tokens=response.usage.output_tokens, + cache_read_input_tokens=cache_read, + cache_creation_input_tokens=cache_creation, + context=ContextUsage(window_size=await self._get_context_window(), used_tokens=used_tokens), + ) + + agent_response = AgentResponse( + stop_reason=stop_reason, + text="\n".join(text_parts), + tool_calls=tool_calls, + usage=usage, + ) + + # Save to history only after a successful response. + self._history.extend([user_msg, {"role": "assistant", "content": response.content}]) + + return agent_response diff --git a/ddev/src/ddev/ai/agent/exceptions.py b/ddev/src/ddev/ai/agent/exceptions.py new file mode 100644 index 0000000000000..d0d25d3665239 --- /dev/null +++ b/ddev/src/ddev/ai/agent/exceptions.py @@ -0,0 +1,29 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + + +class AgentError(Exception): + """Base class for all errors raised by AnthropicAgent.""" + + pass + + +class AgentConnectionError(AgentError): + """Network failure — the API was unreachable.""" + + pass + + +class AgentRateLimitError(AgentError): + """Rate limit hit — the request may be retried after a delay.""" + + pass + + +class AgentAPIError(AgentError): + """The API returned an error status code.""" + + def __init__(self, status_code: int, message: str) -> None: + super().__init__(message) + self.status_code = status_code diff --git a/ddev/src/ddev/ai/tools/core/registry.py b/ddev/src/ddev/ai/tools/core/registry.py index 29c6f92fb8801..240e969a81843 100644 --- a/ddev/src/ddev/ai/tools/core/registry.py +++ b/ddev/src/ddev/ai/tools/core/registry.py @@ -2,15 +2,11 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Final - from anthropic.types import ToolParam from .protocol import ToolProtocol from .types import ToolResult -ALLOWED_TOOL_CALLERS: Final = ["code_execution_20260120"] - class ToolRegistry: """Registry holding all available tools.""" @@ -20,9 +16,8 @@ def __init__(self, tools: list[ToolProtocol]) -> None: @property def definitions(self) -> list[ToolParam]: - """Return Anthropic SDK tool definitions for all registered tools. - Each tool definition dict is not mutated, but a new dict is returned with the allowed_callers key added.""" - return [{**tool.definition, "allowed_callers": ALLOWED_TOOL_CALLERS} for tool in self._tools.values()] + """Return Anthropic SDK tool definitions for all registered tools.""" + return [tool.definition for tool in self._tools.values()] async def run(self, name: str, raw: dict[str, object]) -> ToolResult: """Execute a tool by name, returning an error result if not found.""" diff --git a/ddev/tests/ai/agent/__init__.py b/ddev/tests/ai/agent/__init__.py new file mode 100644 index 0000000000000..75c6647cb9233 --- /dev/null +++ b/ddev/tests/ai/agent/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/tests/ai/agent/test_client.py b/ddev/tests/ai/agent/test_client.py new file mode 100644 index 0000000000000..f4d1b9f5e8c96 --- /dev/null +++ b/ddev/tests/ai/agent/test_client.py @@ -0,0 +1,453 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import anthropic +import pytest + +from ddev.ai.agent.client import AnthropicAgent, StopReason +from ddev.ai.agent.exceptions import ( + AgentAPIError, + AgentConnectionError, + AgentError, + AgentRateLimitError, +) +from ddev.ai.tools.core.registry import ToolRegistry +from ddev.ai.tools.core.types import ToolResult + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def make_usage( + input_tokens: int = 10, + output_tokens: int = 20, + cache_read: int | None = None, + cache_creation: int | None = None, +) -> SimpleNamespace: + return SimpleNamespace( + input_tokens=input_tokens, + output_tokens=output_tokens, + cache_read_input_tokens=cache_read, + cache_creation_input_tokens=cache_creation, + ) + + +def make_text_block(text: str) -> anthropic.types.TextBlock: + return anthropic.types.TextBlock(type="text", text=text) + + +def make_tool_use_block( + id: str = "toolu_01", + name: str = "read_file", + input: dict | None = None, +) -> anthropic.types.ToolUseBlock: + return anthropic.types.ToolUseBlock( + type="tool_use", + id=id, + name=name, + input=input or {"path": "/tmp/file.txt"}, + ) + + +def make_response( + stop_reason: str | None, + content: list, + usage: SimpleNamespace | None = None, +) -> SimpleNamespace: + return SimpleNamespace( + stop_reason=stop_reason, + content=content, + usage=usage or make_usage(), + ) + + +FAKE_CONTEXT_WINDOW = 200_000 + + +def make_agent( + tools: ToolRegistry | None = None, + mock_response: SimpleNamespace | None = None, +) -> tuple[AnthropicAgent, AsyncMock]: + client = MagicMock(spec=anthropic.AsyncAnthropic) + client.messages = MagicMock() + client.messages.create = AsyncMock(return_value=mock_response or make_response("end_turn", [])) + client.models = MagicMock() + client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW)) + registry = tools or ToolRegistry([]) + agent = AnthropicAgent( + client=client, + tools=registry, + system_prompt="You are helpful.", + name="test-agent", + ) + return agent, client.messages.create + + +# --------------------------------------------------------------------------- +# end_turn with a single TextBlock +# --------------------------------------------------------------------------- + + +async def test_end_turn_single_text_block() -> None: + content = [make_text_block("Hello!")] + resp = make_response("end_turn", content) + agent, _ = make_agent(mock_response=resp) + + result = await agent.send("Hi") + + assert result.stop_reason is StopReason.END_TURN + assert result.text == "Hello!" + assert result.tool_calls == [] + assert len(agent.history) == 2 + assert agent.history[0] == {"role": "user", "content": "Hi"} + assert agent.history[1] == {"role": "assistant", "content": content} + + +# --------------------------------------------------------------------------- +# tool_use +# --------------------------------------------------------------------------- + + +async def test_tool_use_single_block() -> None: + block = make_tool_use_block(id="toolu_42", name="read_file", input={"path": "/etc/hosts"}) + resp = make_response("tool_use", [block]) + agent, _ = make_agent(mock_response=resp) + + result = await agent.send("Read hosts") + + assert result.stop_reason is StopReason.TOOL_USE + assert len(result.tool_calls) == 1 + tc = result.tool_calls[0] + assert tc.id == "toolu_42" + assert tc.name == "read_file" + assert tc.input == {"path": "/etc/hosts"} + + +# --------------------------------------------------------------------------- +# mixed TextBlock + ToolUseBlock +# --------------------------------------------------------------------------- + + +async def test_mixed_text_and_tool_use() -> None: + content = [ + make_text_block("I'll read the file for you."), + make_tool_use_block(id="toolu_01", name="read_file"), + ] + resp = make_response("tool_use", content) + agent, _ = make_agent(mock_response=resp) + + result = await agent.send("Read a file") + + assert result.text == "I'll read the file for you." + assert len(result.tool_calls) == 1 + + +# --------------------------------------------------------------------------- +# Multiple TextBlocks are concatenated +# --------------------------------------------------------------------------- + + +async def test_multiple_text_blocks_are_concatenated() -> None: + content = [make_text_block("Hello, "), make_text_block("world!")] + resp = make_response("end_turn", content) + agent, _ = make_agent(mock_response=resp) + + result = await agent.send("Hi") + + assert result.text == "Hello, \nworld!" + + +# --------------------------------------------------------------------------- +# max_tokens is a normal response (not an error) +# --------------------------------------------------------------------------- + + +async def test_max_tokens_is_not_an_error() -> None: + resp = make_response("max_tokens", [make_text_block("Truncated...")]) + agent, _ = make_agent(mock_response=resp) + + result = await agent.send("Tell me everything") + + assert result.stop_reason is StopReason.MAX_TOKENS + assert len(agent.history) == 2 + + +# --------------------------------------------------------------------------- +# allowed_tools filtering +# --------------------------------------------------------------------------- + + +class FakeTool: + def __init__(self, name: str) -> None: + self._name = name + + @property + def name(self) -> str: + return self._name + + @property + def description(self) -> str: + return "" + + @property + def definition(self) -> dict: + return {"name": self._name, "description": "", "input_schema": {}} + + async def run(self, raw: dict) -> ToolResult: + pass + + +async def test_allowed_tools_filters_to_subset() -> None: + registry = ToolRegistry([FakeTool(n) for n in ["read_file", "grep", "mkdir"]]) + resp = make_response("end_turn", [make_text_block("ok")]) + agent, create_mock = make_agent(tools=registry, mock_response=resp) + + await agent.send("Hi", allowed_tools=["read_file"]) + + sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]] + assert sent_names == ["read_file"] + + +async def test_allowed_tools_none_passes_all() -> None: + registry = ToolRegistry([FakeTool(n) for n in ["a", "b"]]) + resp = make_response("end_turn", [make_text_block("ok")]) + agent, create_mock = make_agent(tools=registry, mock_response=resp) + + await agent.send("Hi", allowed_tools=None) + + sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]] + assert sent_names == ["a", "b"] + + +@pytest.mark.parametrize("allowed_tools", [[], ["nonexistent_tool"]]) +async def test_allowed_tools_passes_not_given(allowed_tools: list[str]) -> None: + resp = make_response("end_turn", [make_text_block("ok")]) + agent, create_mock = make_agent(mock_response=resp) + + await agent.send("Hi", allowed_tools=allowed_tools) + + assert create_mock.call_args.kwargs["tools"] is anthropic.NOT_GIVEN + + +# --------------------------------------------------------------------------- +# API errors map to the correct AgentError subclass +# --------------------------------------------------------------------------- + + +def _make_error_agent(side_effect: Exception) -> AnthropicAgent: + client = MagicMock(spec=anthropic.AsyncAnthropic) + client.messages = MagicMock() + client.messages.create = AsyncMock(side_effect=side_effect) + return AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") + + +async def test_connection_error_maps_to_agent_connection_error() -> None: + agent = _make_error_agent(anthropic.APIConnectionError(request=MagicMock())) + + with pytest.raises(AgentConnectionError) as exc_info: + await agent.send("Hi") + + assert "Connection failed" in str(exc_info.value) + assert agent.history == [] + + +async def test_rate_limit_error_maps_to_agent_rate_limit_error() -> None: + agent = _make_error_agent( + anthropic.RateLimitError( + message="rate limit", + response=MagicMock(status_code=429, headers={}), + body=None, + ) + ) + + with pytest.raises(AgentRateLimitError) as exc_info: + await agent.send("Hi") + + assert "Rate limit exceeded" in str(exc_info.value) + assert agent.history == [] + + +async def test_api_status_error_maps_to_agent_api_error() -> None: + agent = _make_error_agent( + anthropic.APIStatusError( + message="internal server error", + response=MagicMock(status_code=500), + body=None, + ) + ) + + with pytest.raises(AgentAPIError) as exc_info: + await agent.send("Hi") + + assert exc_info.value.status_code == 500 + assert agent.history == [] + + +async def test_response_validation_error_maps_to_agent_error() -> None: + agent = _make_error_agent(anthropic.APIResponseValidationError(response=MagicMock(), body=None)) + + with pytest.raises(AgentError) as exc_info: + await agent.send("Hi") + + assert "Response validation failed" in str(exc_info.value) + assert agent.history == [] + + +# --------------------------------------------------------------------------- +# Unknown stop_reason raises AgentError, history unchanged +# --------------------------------------------------------------------------- + + +async def test_unknown_stop_reason_raises_agent_error() -> None: + resp = make_response("totally_unknown_reason", []) + agent, _ = make_agent(mock_response=resp) + + with pytest.raises(AgentError) as exc_info: + await agent.send("Hi") + + assert agent.history == [] + assert "Unknown stop_reason" in str(exc_info.value) + assert "totally_unknown_reason" in str(exc_info.value) + + +# --------------------------------------------------------------------------- +# cache_read_input_tokens=None defaults to 0 +# --------------------------------------------------------------------------- + + +async def test_cache_tokens_none_defaults_to_zero() -> None: + usage = make_usage(cache_read=None, cache_creation=None) + resp = make_response("end_turn", [make_text_block("ok")], usage=usage) + agent, _ = make_agent(mock_response=resp) + + result = await agent.send("Hi") + + assert result.usage.cache_read_input_tokens == 0 + assert result.usage.cache_creation_input_tokens == 0 + + +# --------------------------------------------------------------------------- +# ContextUsage fields +# --------------------------------------------------------------------------- + + +async def test_context_usage_fields() -> None: + usage = make_usage(input_tokens=1000, cache_read=500, cache_creation=200) + resp = make_response("end_turn", [make_text_block("ok")], usage=usage) + agent, _ = make_agent(mock_response=resp) + + result = await agent.send("Hi") + + ctx = result.usage.context + assert ctx.window_size == FAKE_CONTEXT_WINDOW + assert ctx.used_tokens == 1700 # 1000 + 500 + 200 + assert ctx.context_pct == pytest.approx(1700 / FAKE_CONTEXT_WINDOW * 100) + assert ctx.remaining_tokens == FAKE_CONTEXT_WINDOW - 1700 + + +# --------------------------------------------------------------------------- +# context_window is fetched once and cached across multiple sends +# --------------------------------------------------------------------------- + + +async def test_context_window_fetched_once() -> None: + resp = make_response("end_turn", [make_text_block("ok")]) + agent, _ = make_agent(mock_response=resp) + agent._client.messages.create = AsyncMock(return_value=resp) + + await agent.send("First") + await agent.send("Second") + + agent._client.models.retrieve.assert_awaited_once() + + +# --------------------------------------------------------------------------- +# Multi-turn — send str then send tool results → history has 4 entries +# --------------------------------------------------------------------------- + + +async def test_multi_turn_history_grows_correctly() -> None: + tool_resp = make_response("tool_use", [make_tool_use_block(id="toolu_01")]) + text_resp = make_response("end_turn", [make_text_block("Done.")]) + + client = MagicMock(spec=anthropic.AsyncAnthropic) + client.messages = MagicMock() + client.messages.create = AsyncMock(side_effect=[tool_resp, text_resp]) + client.models = MagicMock() + client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW)) + agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") + + first = await agent.send("Do X") + assert first.stop_reason is StopReason.TOOL_USE + assert len(agent.history) == 2 + + tool_results = [{"type": "tool_result", "tool_use_id": "toolu_01", "content": "result"}] + second = await agent.send(tool_results) + assert second.stop_reason is StopReason.END_TURN + assert len(agent.history) == 4 + assert agent.history[2]["role"] == "user" + assert agent.history[3]["role"] == "assistant" + + +# --------------------------------------------------------------------------- +# history property returns a copy +# --------------------------------------------------------------------------- + + +async def test_history_property_returns_copy() -> None: + resp = make_response("end_turn", [make_text_block("ok")]) + agent, _ = make_agent(mock_response=resp) + await agent.send("Hi") + + snapshot = agent.history + snapshot.clear() + + assert len(agent.history) == 2 + + +# --------------------------------------------------------------------------- +# reset() clears history +# --------------------------------------------------------------------------- + + +async def test_reset_clears_history() -> None: + resp = make_response("end_turn", [make_text_block("ok")]) + agent, _ = make_agent(mock_response=resp) + await agent.send("Hi") + assert len(agent.history) == 2 + + agent.reset() + assert agent.history == [] + + +# --------------------------------------------------------------------------- +# Error mid-conversation leaves history unchanged +# --------------------------------------------------------------------------- + + +async def test_error_mid_conversation_leaves_history_unchanged() -> None: + ok_resp = make_response("end_turn", [make_text_block("ok")]) + client = MagicMock(spec=anthropic.AsyncAnthropic) + client.messages = MagicMock() + client.messages.create = AsyncMock( + side_effect=[ + ok_resp, + anthropic.APIConnectionError(request=MagicMock()), + ] + ) + client.models = MagicMock() + client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW)) + agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") + + await agent.send("First message") + history_after_first = agent.history[:] + + with pytest.raises(AgentConnectionError): + await agent.send("Second message") + + assert agent.history == history_after_first diff --git a/ddev/tests/ai/tools/core/test_base.py b/ddev/tests/ai/tools/core/test_base.py index 96cd0f8b07d0c..35e94f750a69e 100644 --- a/ddev/tests/ai/tools/core/test_base.py +++ b/ddev/tests/ai/tools/core/test_base.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from typing import Annotated import pytest @@ -194,8 +193,8 @@ async def __call__(self, tool_input: SimpleInput) -> ToolResult: # --- run(): happy path --- -def test_run_valid_input_returns_success(echo_tool: EchoTool): - result = asyncio.run(echo_tool.run({"message": "hello"})) +async def test_run_valid_input_returns_success(echo_tool: EchoTool): + result = await echo_tool.run({"message": "hello"}) assert result.success is True assert result.data == "hello" @@ -210,8 +209,8 @@ def test_run_valid_input_returns_success(echo_tool: EchoTool): {"message": "hi", "extra": "oops"}, ], ) -def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict): - result = asyncio.run(echo_tool.run(raw)) +async def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict): + result = await echo_tool.run(raw) assert result.success is False assert result.error is not None @@ -219,8 +218,8 @@ def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict): # --- run(): __call__ exception handling --- -def test_run_captures_exception_from_call(failing_tool: FailingTool): - result = asyncio.run(failing_tool.run({"message": "boom"})) +async def test_run_captures_exception_from_call(failing_tool: FailingTool): + result = await failing_tool.run({"message": "boom"}) assert isinstance(result, ToolResult) assert result.success is False assert "RuntimeError" in result.error diff --git a/ddev/tests/ai/tools/core/test_registry.py b/ddev/tests/ai/tools/core/test_registry.py index fdd42714b6ed4..1366a9d8b5be8 100644 --- a/ddev/tests/ai/tools/core/test_registry.py +++ b/ddev/tests/ai/tools/core/test_registry.py @@ -1,11 +1,10 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio import pytest -from ddev.ai.tools.core.registry import ALLOWED_TOOL_CALLERS, ToolRegistry +from ddev.ai.tools.core.registry import ToolRegistry from ddev.ai.tools.core.types import ToolResult # --------------------------------------------------------------------------- @@ -76,8 +75,6 @@ def test_empty_registry_returns_empty_list(): def test_tool_registry_definitions_returns_all_tool_definitions(): registry = ToolRegistry([FakeTool("a"), FakeTool("b")]) assert len(registry.definitions) == 2 - for defn in registry.definitions: - assert defn["allowed_callers"] == ALLOWED_TOOL_CALLERS def test_definition_contains_tool_name(): @@ -90,41 +87,41 @@ def test_definition_contains_tool_name(): # --------------------------------------------------------------------------- -def test_run_dispatches_to_correct_tool(): +async def test_run_dispatches_to_correct_tool(): tool_a = FakeTool("a", ToolResult(success=True, data="from a")) tool_b = FakeTool("b", ToolResult(success=True, data="from b")) registry = ToolRegistry([tool_a, tool_b]) - result = asyncio.run(registry.run("b", {})) + result = await registry.run("b", {}) assert result.success is True assert result.data == "from b" -def test_passes_raw_dict_to_tool_unchanged(): +async def test_passes_raw_dict_to_tool_unchanged(): tool = FakeTool("t") registry = ToolRegistry([tool]) raw = {"key": "value", "num": 42} - asyncio.run(registry.run("t", raw)) + await registry.run("t", raw) assert tool.last_raw == raw -def test_returns_tool_result_on_tool_failure(): +async def test_returns_tool_result_on_tool_failure(): registry = ToolRegistry([FakeTool("t", ToolResult(success=False, error="bad input"))]) - result = asyncio.run(registry.run("t", {})) + result = await registry.run("t", {}) assert result.success is False assert result.error == "bad input" -def test_unknown_tool_returns_failure(): +async def test_unknown_tool_returns_failure(): registry = ToolRegistry([FakeTool("known_tool")]) - result = asyncio.run(registry.run("unknown_tool", {})) + result = await registry.run("unknown_tool", {}) assert result.success is False assert "Unknown tool: 'unknown_tool'" in result.error -def test_empty_registry_always_returns_unknown_error(): +async def test_empty_registry_always_returns_unknown_error(): registry = ToolRegistry([]) - result = asyncio.run(registry.run("anything", {})) + result = await registry.run("anything", {}) assert result.success is False assert result.error is not None diff --git a/ddev/tests/ai/tools/fs/conftest.py b/ddev/tests/ai/tools/fs/conftest.py index 8d6677b98c398..12ae9e34eb1d5 100644 --- a/ddev/tests/ai/tools/fs/conftest.py +++ b/ddev/tests/ai/tools/fs/conftest.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio import pytest @@ -38,8 +37,8 @@ def append_tool(registry: FileRegistry) -> AppendFileTool: @pytest.fixture -def known_file(tmp_path, create_tool: CreateFileTool): +async def known_file(tmp_path, create_tool: CreateFileTool): """A temp file registered in the registry via create.""" f = tmp_path / "file.txt" - asyncio.run(create_tool.run({"path": str(f), "content": "line one\nline two\nline three\n"})) + await create_tool.run({"path": str(f), "content": "line one\nline two\nline three\n"}) return f diff --git a/ddev/tests/ai/tools/fs/test_append_file.py b/ddev/tests/ai/tools/fs/test_append_file.py index 2b669572d30bb..289142e378191 100644 --- a/ddev/tests/ai/tools/fs/test_append_file.py +++ b/ddev/tests/ai/tools/fs/test_append_file.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import patch import pytest @@ -23,8 +22,10 @@ def test_tool_name(registry: FileRegistry) -> None: ("A\r\nB\r\n", "A\nB\n", "\r"), ], ) -def test_append_file_success(append_tool: AppendFileTool, known_file, content, expected_in, expected_not_in) -> None: - result = asyncio.run(append_tool.run({"path": str(known_file), "content": content})) +async def test_append_file_success( + append_tool: AppendFileTool, known_file, content, expected_in, expected_not_in +) -> None: + result = await append_tool.run({"path": str(known_file), "content": content}) assert result.success is True text = known_file.read_text(encoding="utf-8") @@ -33,11 +34,11 @@ def test_append_file_success(append_tool: AppendFileTool, known_file, content, e assert expected_not_in not in text -def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tmp_path) -> None: +async def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tmp_path) -> None: f = tmp_path / "unread.txt" f.write_text("content", encoding="utf-8") - result = asyncio.run(append_tool.run({"path": str(f), "content": "more"})) + result = await append_tool.run({"path": str(f), "content": "more"}) assert result.success is False assert "Not authorized" in result.error @@ -50,39 +51,39 @@ def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tm ("", "first line", "first line"), ], ) -def test_append_file_separator( +async def test_append_file_separator( append_tool: AppendFileTool, create_tool: CreateFileTool, tmp_path, initial, appended, expected ) -> None: f = tmp_path / "file.txt" - asyncio.run(create_tool.run({"path": str(f), "content": initial})) + await create_tool.run({"path": str(f), "content": initial}) - result = asyncio.run(append_tool.run({"path": str(f), "content": appended})) + result = await append_tool.run({"path": str(f), "content": appended}) assert result.success is True assert f.read_text(encoding="utf-8") == expected -def test_append_file_fails_if_file_changed_externally(append_tool: AppendFileTool, known_file) -> None: +async def test_append_file_fails_if_file_changed_externally(append_tool: AppendFileTool, known_file) -> None: known_file.write_text("externally modified\n", encoding="utf-8") - result = asyncio.run(append_tool.run({"path": str(known_file), "content": "more"})) + result = await append_tool.run({"path": str(known_file), "content": "more"}) assert result.success is False assert "Re-read and retry" in result.error -def test_append_file_updates_registry(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None: - asyncio.run(append_tool.run({"path": str(known_file), "content": "extra\n"})) +async def test_append_file_updates_registry(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None: + await append_tool.run({"path": str(known_file), "content": "extra\n"}) new_content = known_file.read_text(encoding="utf-8") assert registry.verify(str(known_file), new_content) is True -def test_append_file_oserror_on_write(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None: +async def test_append_file_oserror_on_write(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None: original_content = known_file.read_text(encoding="utf-8") with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")): - result = asyncio.run(append_tool.run({"path": str(known_file), "content": "new line"})) + result = await append_tool.run({"path": str(known_file), "content": "new line"}) assert result.success is False assert result.error is not None diff --git a/ddev/tests/ai/tools/fs/test_create_file.py b/ddev/tests/ai/tools/fs/test_create_file.py index 2714ef5bb06aa..8b0c0296fa38a 100644 --- a/ddev/tests/ai/tools/fs/test_create_file.py +++ b/ddev/tests/ai/tools/fs/test_create_file.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import patch from ddev.ai.tools.fs.create_file import CreateFileTool @@ -12,41 +11,41 @@ def test_tool_name(registry: FileRegistry) -> None: assert CreateFileTool(registry).name == "create_file" -def test_create_file_success(create_tool: CreateFileTool, tmp_path) -> None: +async def test_create_file_success(create_tool: CreateFileTool, tmp_path) -> None: f = tmp_path / "new.txt" - result = asyncio.run(create_tool.run({"path": str(f), "content": "hello"})) + result = await create_tool.run({"path": str(f), "content": "hello"}) assert result.success is True assert f.read_text(encoding="utf-8") == "hello" -def test_create_file_default_empty_content(create_tool: CreateFileTool, tmp_path) -> None: +async def test_create_file_default_empty_content(create_tool: CreateFileTool, tmp_path) -> None: f = tmp_path / "empty.txt" - result = asyncio.run(create_tool.run({"path": str(f)})) + result = await create_tool.run({"path": str(f)}) assert result.success is True assert f.read_text(encoding="utf-8") == "" -def test_create_file_creates_missing_parent_dirs(create_tool: CreateFileTool, tmp_path) -> None: +async def test_create_file_creates_missing_parent_dirs(create_tool: CreateFileTool, tmp_path) -> None: f = tmp_path / "a" / "b" / "c" / "file.txt" - result = asyncio.run(create_tool.run({"path": str(f), "content": "nested"})) + result = await create_tool.run({"path": str(f), "content": "nested"}) assert result.success is True assert f.exists() assert f.read_text(encoding="utf-8") == "nested" -def test_create_file_fails_if_file_already_exists( +async def test_create_file_fails_if_file_already_exists( create_tool: CreateFileTool, registry: FileRegistry, tmp_path ) -> None: f = tmp_path / "existing.txt" f.write_text("original", encoding="utf-8") - result = asyncio.run(create_tool.run({"path": str(f), "content": "new"})) + result = await create_tool.run({"path": str(f), "content": "new"}) assert result.success is False assert result.error is not None @@ -54,19 +53,19 @@ def test_create_file_fails_if_file_already_exists( assert not registry.is_known(str(f)) -def test_create_tool_registers_in_registry(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: +async def test_create_tool_registers_in_registry(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: f = tmp_path / "file.txt" - asyncio.run(create_tool.run({"path": str(f), "content": "hi"})) + await create_tool.run({"path": str(f), "content": "hi"}) assert registry.is_known(str(f)) is True assert registry.verify(str(f), "hi") is True -def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: +async def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: f = tmp_path / "a" / "b" / "new.txt" with patch("pathlib.Path.mkdir", side_effect=PermissionError("permission denied")): - result = asyncio.run(create_tool.run({"path": str(f), "content": "hi"})) + result = await create_tool.run({"path": str(f), "content": "hi"}) assert result.success is False assert result.error is not None @@ -74,11 +73,11 @@ def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: Fil assert not registry.is_known(str(f)) -def test_create_file_oserror_on_write(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: +async def test_create_file_oserror_on_write(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: f = tmp_path / "new.txt" with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")): - result = asyncio.run(create_tool.run({"path": str(f), "content": "hi"})) + result = await create_tool.run({"path": str(f), "content": "hi"}) assert result.success is False assert result.error is not None diff --git a/ddev/tests/ai/tools/fs/test_edit_file.py b/ddev/tests/ai/tools/fs/test_edit_file.py index cbfd48a78c193..27c8b87cedce2 100644 --- a/ddev/tests/ai/tools/fs/test_edit_file.py +++ b/ddev/tests/ai/tools/fs/test_edit_file.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import patch import pytest @@ -15,8 +14,8 @@ def test_tool_name(registry: FileRegistry) -> None: assert EditFileTool(registry).name == "edit_file" -def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None: - result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line two", "new_string": "line TWO"})) +async def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None: + result = await edit_tool.run({"path": str(known_file), "old_string": "line two", "new_string": "line TWO"}) assert result.success is True content = known_file.read_text(encoding="utf-8") @@ -24,54 +23,56 @@ def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None: assert "line two" not in content -def test_edit_file_deletes_line(edit_tool: EditFileTool, known_file) -> None: - result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line two\n", "new_string": ""})) +async def test_edit_file_deletes_line(edit_tool: EditFileTool, known_file) -> None: + result = await edit_tool.run({"path": str(known_file), "old_string": "line two\n", "new_string": ""}) assert result.success is True assert "line two" not in known_file.read_text(encoding="utf-8") -def test_edit_file_fails_for_unregistered_file(edit_tool: EditFileTool, tmp_path) -> None: +async def test_edit_file_fails_for_unregistered_file(edit_tool: EditFileTool, tmp_path) -> None: f = tmp_path / "unread.txt" f.write_text("content", encoding="utf-8") - result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "content", "new_string": "new"})) + result = await edit_tool.run({"path": str(f), "old_string": "content", "new_string": "new"}) assert result.success is False assert "Not authorized" in result.error @pytest.mark.parametrize("old_string", ["does not exist", ""]) -def test_edit_file_fails_if_old_string_not_found_or_empty(edit_tool: EditFileTool, known_file, old_string) -> None: - result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": old_string, "new_string": "x"})) +async def test_edit_file_fails_if_old_string_not_found_or_empty( + edit_tool: EditFileTool, known_file, old_string +) -> None: + result = await edit_tool.run({"path": str(known_file), "old_string": old_string, "new_string": "x"}) assert result.success is False -def test_edit_file_fails_if_old_string_ambiguous( +async def test_edit_file_fails_if_old_string_ambiguous( edit_tool: EditFileTool, create_tool: CreateFileTool, tmp_path ) -> None: f = tmp_path / "dup.txt" - asyncio.run(create_tool.run({"path": str(f), "content": "foo\nfoo\nfoo\n"})) + await create_tool.run({"path": str(f), "content": "foo\nfoo\nfoo\n"}) - result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "foo", "new_string": "bar"})) + result = await edit_tool.run({"path": str(f), "old_string": "foo", "new_string": "bar"}) assert result.success is False assert "3" in result.error assert result.hint is not None -def test_edit_file_fails_if_file_changed_externally(edit_tool: EditFileTool, known_file) -> None: +async def test_edit_file_fails_if_file_changed_externally(edit_tool: EditFileTool, known_file) -> None: known_file.write_text("externally modified\n", encoding="utf-8") - result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"})) + result = await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"}) assert result.success is False assert "Re-read and retry" in result.error -def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None: - asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "LINE ONE"})) +async def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None: + await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "LINE ONE"}) new_content = known_file.read_text(encoding="utf-8") assert registry.verify(str(known_file), new_content) is True @@ -85,23 +86,23 @@ def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegis ("line one\n", "line one", "A\r\nB", "A\nB\n"), # CRLF in new_string ], ) -def test_edit_file_normalizes_crlf( +async def test_edit_file_normalizes_crlf( edit_tool: EditFileTool, create_tool: CreateFileTool, tmp_path, file_content, old_string, new_string, expected ) -> None: f = tmp_path / "file.txt" - asyncio.run(create_tool.run({"path": str(f), "content": file_content})) + await create_tool.run({"path": str(f), "content": file_content}) - result = asyncio.run(edit_tool.run({"path": str(f), "old_string": old_string, "new_string": new_string})) + result = await edit_tool.run({"path": str(f), "old_string": old_string, "new_string": new_string}) assert result.success is True assert f.read_text(encoding="utf-8") == expected -def test_edit_file_oserror_on_write(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None: +async def test_edit_file_oserror_on_write(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None: original_content = known_file.read_text(encoding="utf-8") with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")): - result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"})) + result = await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"}) assert result.success is False assert result.error is not None diff --git a/ddev/tests/ai/tools/fs/test_read_file.py b/ddev/tests/ai/tools/fs/test_read_file.py index f1b8da06d91ed..f2497e6c09a18 100644 --- a/ddev/tests/ai/tools/fs/test_read_file.py +++ b/ddev/tests/ai/tools/fs/test_read_file.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import patch import pytest @@ -14,47 +13,47 @@ def test_tool_name(registry: FileRegistry) -> None: assert ReadFileTool(registry).name == "read_file" -def test_read_file_success(read_tool: ReadFileTool, tmp_path) -> None: +async def test_read_file_success(read_tool: ReadFileTool, tmp_path) -> None: f = tmp_path / "config.txt" f.write_text("hello\nworld\n", encoding="utf-8") - result = asyncio.run(read_tool.run({"path": str(f)})) + result = await read_tool.run({"path": str(f)}) assert result.success is True assert result.data == "0: hello\n1: world\n" -def test_read_registers_unknown_file(read_tool: ReadFileTool, registry: FileRegistry, tmp_path) -> None: +async def test_read_registers_unknown_file(read_tool: ReadFileTool, registry: FileRegistry, tmp_path) -> None: f = tmp_path / "file.txt" f.write_text("content", encoding="utf-8") - asyncio.run(read_tool.run({"path": str(f)})) + await read_tool.run({"path": str(f)}) assert registry.is_known(str(f)) is True -def test_read_file_missing_file(read_tool: ReadFileTool, tmp_path) -> None: - result = asyncio.run(read_tool.run({"path": str(tmp_path / "ghost.txt")})) +async def test_read_file_missing_file(read_tool: ReadFileTool, tmp_path) -> None: + result = await read_tool.run({"path": str(tmp_path / "ghost.txt")}) assert result.success is False assert str(tmp_path / "ghost.txt") in result.error -def test_read_file_permission_error(read_tool: ReadFileTool, tmp_path) -> None: +async def test_read_file_permission_error(read_tool: ReadFileTool, tmp_path) -> None: f = tmp_path / "secret.txt" f.write_text("secret", encoding="utf-8") with patch("pathlib.Path.read_text", side_effect=PermissionError("permission denied")): - result = asyncio.run(read_tool.run({"path": str(f)})) + result = await read_tool.run({"path": str(f)}) assert result.success is False assert result.error is not None -def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None: +async def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None: f = tmp_path / "binary.bin" f.write_bytes(b"\xff\xfe\x00binary") - result = asyncio.run(read_tool.run({"path": str(f)})) + result = await read_tool.run({"path": str(f)}) assert result.success is False assert result.error is not None @@ -71,23 +70,23 @@ def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None: (100, None, ""), # offset beyond EOF ], ) -def test_read_file_with_offset_and_limit(read_tool: ReadFileTool, tmp_path, offset, limit, expected) -> None: +async def test_read_file_with_offset_and_limit(read_tool: ReadFileTool, tmp_path, offset, limit, expected) -> None: f = tmp_path / "file.txt" f.write_text("a\nb\nc\n", encoding="utf-8") - result = asyncio.run(read_tool.run({"path": str(f), "offset": offset, "limit": limit})) + result = await read_tool.run({"path": str(f), "offset": offset, "limit": limit}) assert result.success is True assert result.data == expected -def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None: +async def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None: from ddev.ai.tools.core.truncation import MAX_CHARS f = tmp_path / "large.txt" f.write_text("x" * (MAX_CHARS + 1000), encoding="utf-8") - result = asyncio.run(read_tool.run({"path": str(f)})) + result = await read_tool.run({"path": str(f)}) assert result.success is True assert result.truncated is True @@ -95,11 +94,11 @@ def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None: assert result.hint is not None -def test_read_file_no_trailing_newline(read_tool: ReadFileTool, tmp_path) -> None: +async def test_read_file_no_trailing_newline(read_tool: ReadFileTool, tmp_path) -> None: f = tmp_path / "file.txt" f.write_text("no newline at end", encoding="utf-8") - result = asyncio.run(read_tool.run({"path": str(f)})) + result = await read_tool.run({"path": str(f)}) assert result.success is True assert result.data == "0: no newline at end" diff --git a/ddev/tests/ai/tools/fs/test_workflow.py b/ddev/tests/ai/tools/fs/test_workflow.py index 077f63189bf91..a45ad9d937e26 100644 --- a/ddev/tests/ai/tools/fs/test_workflow.py +++ b/ddev/tests/ai/tools/fs/test_workflow.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from ddev.ai.tools.fs.append_file import AppendFileTool from ddev.ai.tools.fs.create_file import CreateFileTool @@ -10,7 +9,7 @@ from ddev.ai.tools.fs.read_file import ReadFileTool -def test_workflow_create_read_edit_append( +async def test_workflow_create_read_edit_append( create_tool: CreateFileTool, read_tool: ReadFileTool, edit_tool: EditFileTool, @@ -21,20 +20,20 @@ def test_workflow_create_read_edit_append( f = tmp_path / "workflow.txt" # Step 1: create - r = asyncio.run(create_tool.run({"path": str(f), "content": "version: 1\n"})) + r = await create_tool.run({"path": str(f), "content": "version: 1\n"}) assert r.success is True # Step 2: read (registers current content) - r = asyncio.run(read_tool.run({"path": str(f)})) + r = await read_tool.run({"path": str(f)}) assert r.success is True # Step 3: edit - r = asyncio.run(edit_tool.run({"path": str(f), "old_string": "version: 1", "new_string": "version: 2"})) + r = await edit_tool.run({"path": str(f), "old_string": "version: 1", "new_string": "version: 2"}) assert r.success is True assert "version: 2" in f.read_text(encoding="utf-8") # Step 4: append - r = asyncio.run(append_tool.run({"path": str(f), "content": "# updated\n"})) + r = await append_tool.run({"path": str(f), "content": "# updated\n"}) assert r.success is True assert f.read_text(encoding="utf-8").endswith("# updated\n") @@ -42,22 +41,22 @@ def test_workflow_create_read_edit_append( assert registry.verify(str(f), f.read_text(encoding="utf-8")) is True -def test_workflow_stale_file( +async def test_workflow_stale_file( create_tool: CreateFileTool, read_tool: ReadFileTool, edit_tool: EditFileTool, tmp_path, ) -> None: f = tmp_path / "shared.txt" - asyncio.run(create_tool.run({"path": str(f), "content": "original\n"})) + await create_tool.run({"path": str(f), "content": "original\n"}) f.write_text("updated externally\n", encoding="utf-8") - result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "original", "new_string": "my edit"})) + result = await edit_tool.run({"path": str(f), "old_string": "original", "new_string": "my edit"}) assert result.success is False assert "Re-read and retry" in result.error - asyncio.run(read_tool.run({"path": str(f)})) + await read_tool.run({"path": str(f)}) - result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "updated externally", "new_string": "final"})) + result = await edit_tool.run({"path": str(f), "old_string": "updated externally", "new_string": "final"}) assert result.success is True assert f.read_text(encoding="utf-8") == "final\n" diff --git a/ddev/tests/ai/tools/http/test_http_get.py b/ddev/tests/ai/tools/http/test_http_get.py index d2e8c06220fa1..2cb871bdfd62a 100644 --- a/ddev/tests/ai/tools/http/test_http_get.py +++ b/ddev/tests/ai/tools/http/test_http_get.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import AsyncMock, MagicMock, patch import httpx @@ -51,8 +50,8 @@ def test_tool_meta(http_tool: HttpGetTool) -> None: @pytest.mark.parametrize("url", ["ftp://example.com", "example.com", "", "//example.com"]) -def test_invalid_url(http_tool: HttpGetTool, url: str) -> None: - result = asyncio.run(http_tool.run({"url": url})) +async def test_invalid_url(http_tool: HttpGetTool, url: str) -> None: + result = await http_tool.run({"url": url}) assert result.success is False assert "http" in result.error and "https" in result.error @@ -71,9 +70,9 @@ def test_invalid_url(http_tool: HttpGetTool, url: str) -> None: (204, ""), ], ) -def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) -> None: +async def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) -> None: with patch_httpx(fake_response(status_code, body)): - result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"})) + result = await http_tool.run({"url": "http://localhost:9090/metrics"}) assert result.success is True assert f"Status: {status_code}" in result.data @@ -81,9 +80,9 @@ def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) -> @pytest.mark.parametrize("status_code", [400, 404, 500, 503]) -def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) -> None: +async def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) -> None: with patch_httpx(fake_response(status_code, "error body")): - result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"})) + result = await http_tool.run({"url": "http://localhost:9090/metrics"}) assert result.success is True assert f"Status: {status_code}" in result.data @@ -94,17 +93,17 @@ def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) -> # --------------------------------------------------------------------------- -def test_request_timeout(http_tool: HttpGetTool) -> None: +async def test_request_timeout(http_tool: HttpGetTool) -> None: with patch_httpx(side_effect=httpx.TimeoutException("timed out")): - result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics", "timeout": 1.0})) + result = await http_tool.run({"url": "http://localhost:9090/metrics", "timeout": 1.0}) assert result.success is False assert "timed out after 1.0s" in result.error -def test_request_error(http_tool: HttpGetTool) -> None: +async def test_request_error(http_tool: HttpGetTool) -> None: with patch_httpx(side_effect=httpx.RequestError("connection refused")): - result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"})) + result = await http_tool.run({"url": "http://localhost:9090/metrics"}) assert result.success is False assert "Request failed" in result.error @@ -116,12 +115,12 @@ def test_request_error(http_tool: HttpGetTool) -> None: @pytest.mark.parametrize("status_code", [200, 500]) -def test_response_truncated(http_tool: HttpGetTool, status_code: int) -> None: +async def test_response_truncated(http_tool: HttpGetTool, status_code: int) -> None: from ddev.ai.tools.core.truncation import MAX_CHARS large_body = "x" * (MAX_CHARS + 1000) with patch_httpx(fake_response(status_code, large_body)): - result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"})) + result = await http_tool.run({"url": "http://localhost:9090/metrics"}) assert result.success is True assert result.truncated is True diff --git a/ddev/tests/ai/tools/shell/test_base.py b/ddev/tests/ai/tools/shell/test_base.py index 5d7431239a5e7..3568170b9092d 100644 --- a/ddev/tests/ai/tools/shell/test_base.py +++ b/ddev/tests/ai/tools/shell/test_base.py @@ -79,42 +79,42 @@ def slow_greet_tool() -> SlowGreetTool: # --------------------------------------------------------------------------- -def test_run_command_success(proc): +async def test_run_command_success(proc): with patch_proc(proc): - result = asyncio.run(run_command(["echo", "hello"])) + result = await run_command(["echo", "hello"]) assert result.success is True assert result.data == "hello\n" assert result.truncated is False -def test_run_command_failure_combines_stdout_and_stderr(): +async def test_run_command_failure_combines_stdout_and_stderr(): proc = make_proc(returncode=1, stdout=b"partial\n", stderr=b"error\n") with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.success is False assert "partial" in result.data assert "error" in result.data -def test_run_command_failure_stderr_only_when_no_stdout(): +async def test_run_command_failure_stderr_only_when_no_stdout(): proc = make_proc(returncode=1, stdout=b"", stderr=b"fatal error\n") with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.success is False and result.data == "fatal error\n" -def test_run_command_ignores_stderr_on_zero_exit(): +async def test_run_command_ignores_stderr_on_zero_exit(): proc = make_proc(returncode=0, stdout=b"out\n", stderr=b"warning\n") with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.success is True assert "warning" not in result.data -def test_run_command_stderr_included_when_stdout_empty_on_success(): +async def test_run_command_stderr_included_when_stdout_empty_on_success(): proc = make_proc(returncode=0, stdout=b"", stderr=b"info: initialized\n") with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.success is True assert result.data == "info: initialized\n" @@ -127,10 +127,10 @@ def test_run_command_stderr_included_when_stdout_empty_on_success(): (1, b"", b""), ], ) -def test_run_command_empty_output(returncode, stdout, stderr): +async def test_run_command_empty_output(returncode, stdout, stderr): proc = make_proc(returncode=returncode, stdout=stdout, stderr=stderr) with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.data == "(no output)" @@ -139,27 +139,27 @@ def test_run_command_empty_output(returncode, stdout, stderr): # --------------------------------------------------------------------------- -def test_run_command_not_found(): +async def test_run_command_not_found(): with patch("asyncio.create_subprocess_exec", side_effect=FileNotFoundError()): - result = asyncio.run(run_command(["nonexistent"])) + result = await run_command(["nonexistent"]) assert result.success is False assert "Command not found" in result.error assert "nonexistent" in result.error -def test_run_command_timeout(): +async def test_run_command_timeout(): proc = make_proc() with patch_proc(proc): with patch("asyncio.wait_for", new=_raise_timeout): - result = asyncio.run(run_command(["sleep", "100"], timeout=5)) + result = await run_command(["sleep", "100"], timeout=5) assert result.success is False assert "5s" in result.error proc.kill.assert_called_once() -def test_run_command_unexpected_exception(): +async def test_run_command_unexpected_exception(): with patch("asyncio.create_subprocess_exec", side_effect=OSError("permission denied")): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.success is False assert "OSError" in result.error assert "permission denied" in result.error @@ -170,21 +170,21 @@ def test_run_command_unexpected_exception(): # --------------------------------------------------------------------------- -def test_run_command_truncation(): +async def test_run_command_truncation(): large = ("x" * 80 + "\n") * 700 proc = make_proc(stdout=large.encode()) with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.truncated is True assert result.total_size == len(large) assert result.shown_size == len(result.data) assert result.hint is not None -def test_run_command_no_truncation_at_limit(): +async def test_run_command_no_truncation_at_limit(): proc = make_proc(stdout=("x" * MAX_CHARS).encode()) with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.truncated is False assert result.total_size is None assert result.hint is None @@ -200,10 +200,10 @@ def test_cmd_tool_timeouts(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool assert SlowGreetTool.timeout == 60 # custom timeout -def test_cmd_tool_dispatches_with_correct_timeout(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool): +async def test_cmd_tool_dispatches_with_correct_timeout(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool): for tool, expected_timeout in [(greet_tool, 10), (slow_greet_tool, 60)]: with patch( "ddev.ai.tools.shell.base.run_command", new=AsyncMock(return_value=ToolResult(success=True)) ) as mock_run: - asyncio.run(tool.run({"name": "world"})) + await tool.run({"name": "world"}) mock_run.assert_called_once_with(["echo", "hello world"], timeout=expected_timeout) diff --git a/ddev/tests/ai/tools/shell/test_tools.py b/ddev/tests/ai/tools/shell/test_tools.py index 81fcb45d3d3b1..05084acc97e9e 100644 --- a/ddev/tests/ai/tools/shell/test_tools.py +++ b/ddev/tests/ai/tools/shell/test_tools.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import AsyncMock, patch import pytest @@ -66,12 +65,12 @@ def test_grep_cmd_pattern_and_path_placement(grep_tool: GrepTool): assert cmd[-1] == "/my dir/sub dir" -def test_grep_no_matches_returns_success(grep_tool: GrepTool): +async def test_grep_no_matches_returns_success(grep_tool: GrepTool): from ddev.ai.tools.core.types import ToolResult no_match_result = ToolResult(success=False, data="(no output)", error=None) with patch("ddev.ai.tools.shell.grep.run_command", new=AsyncMock(return_value=no_match_result)): - result = asyncio.run(grep_tool(GrepInput(pattern="nomatch", path="/tmp"))) + result = await grep_tool(GrepInput(pattern="nomatch", path="/tmp")) assert result.success is True assert result.data == "(no output)"