From 9e4d08b4188bf6eaffeb0dc1b0e2ab3bd3a75fbf Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Wed, 25 Mar 2026 16:03:00 +0100 Subject: [PATCH 1/5] Implement AnthropicAgent, defined types and created tests --- ddev/src/ddev/ai/agent/__init__.py | 3 + ddev/src/ddev/ai/agent/agent.py | 125 +++++++++ ddev/src/ddev/ai/agent/types.py | 73 ++++++ ddev/tests/ai/agent/__init__.py | 3 + ddev/tests/ai/agent/test_agent.py | 408 +++++++++++++++++++++++++++++ 5 files changed, 612 insertions(+) create mode 100644 ddev/src/ddev/ai/agent/__init__.py create mode 100644 ddev/src/ddev/ai/agent/agent.py create mode 100644 ddev/src/ddev/ai/agent/types.py create mode 100644 ddev/tests/ai/agent/__init__.py create mode 100644 ddev/tests/ai/agent/test_agent.py diff --git a/ddev/src/ddev/ai/agent/__init__.py b/ddev/src/ddev/ai/agent/__init__.py new file mode 100644 index 0000000000000..75c6647cb9233 --- /dev/null +++ b/ddev/src/ddev/ai/agent/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/src/ddev/ai/agent/agent.py b/ddev/src/ddev/ai/agent/agent.py new file mode 100644 index 0000000000000..4cbed20072f26 --- /dev/null +++ b/ddev/src/ddev/ai/agent/agent.py @@ -0,0 +1,125 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from typing import Final + +import anthropic +from anthropic.types import MessageParam, ToolResultBlockParam + +from ddev.ai.tools.core.registry import ToolRegistry + +from .types import ( + AgentAPIError, + AgentConnectionError, + AgentError, + AgentRateLimitError, + AgentResponse, + StopReason, + TokenUsage, + ToolCall, +) + +MODEL: Final[str] = "claude-opus-4-6" +MAX_TOKENS: Final[int] = 8192 + + +class AnthropicAgent: + def __init__( + self, + client: anthropic.AsyncAnthropic, + tools: ToolRegistry, + system_prompt: str, + name: str, + model: str = MODEL, + max_tokens: int = MAX_TOKENS, + ) -> None: + self._client = client + self._tools = tools + self._system_prompt = system_prompt + self.name = name + self._model = model + self._max_tokens = max_tokens + self._history: list[MessageParam] = [] + + @property + def history(self) -> list[MessageParam]: + """Read-only snapshot of the conversation history.""" + return list(self._history) + + def reset(self) -> None: + """Clear conversation history to start a new conversation.""" + self._history = [] + + def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list: + """Filter tool definitions by allowlist. None means all tools.""" + if allowed_tools is None: + return self._tools.definitions + allowed = set(allowed_tools) + return [d for d in self._tools.definitions if d["name"] in allowed] + + async def send( + self, + content: str | list[ToolResultBlockParam], + allowed_tools: list[str] | None = None, + ) -> AgentResponse: + tool_defs = self._get_tool_definitions(allowed_tools) + + user_msg: MessageParam = {"role": "user", "content": content} + messages = [*self._history, user_msg] + + try: + response = await self._client.messages.create( + model=self._model, + max_tokens=self._max_tokens, + system=self._system_prompt, + messages=messages, + tools=tool_defs if tool_defs else anthropic.NOT_GIVEN, + ) + except anthropic.APIConnectionError as e: + raise AgentConnectionError(f"Connection failed: {e}") from e + except anthropic.RateLimitError as e: + raise AgentRateLimitError(f"Rate limit exceeded: {e}") from e + except anthropic.APIStatusError as e: + raise AgentAPIError(e.status_code, e.message) from e + except anthropic.APIResponseValidationError as e: + raise AgentError(f"Response validation failed: {e}") from e + + # stop_reason is None only in streaming responses; we use non-streaming, so None is unexpected + if response.stop_reason is None: + raise AgentError("Received null stop_reason from API") + + try: + stop_reason = StopReason(response.stop_reason) + except ValueError: + raise AgentError(f"Unknown stop_reason: {response.stop_reason!r}") from None + + text_parts: list[str] = [] + tool_calls: list[ToolCall] = [] + + for block in response.content: + if isinstance(block, anthropic.types.TextBlock): + text_parts.append(block.text) + elif isinstance(block, anthropic.types.ToolUseBlock): + tool_calls.append(ToolCall(id=block.id, name=block.name, input=dict(block.input))) + # ThinkingBlock and RedactedThinkingBlock are intentionally ignored. + # Extended thinking support can add a `thinking: str` field to AgentResponse later. + + usage = TokenUsage( + input_tokens=response.usage.input_tokens, + output_tokens=response.usage.output_tokens, + cache_read_input_tokens=response.usage.cache_read_input_tokens or 0, + cache_creation_input_tokens=response.usage.cache_creation_input_tokens or 0, + ) + + agent_response = AgentResponse( + stop_reason=stop_reason, + text="".join(text_parts), + tool_calls=tool_calls, + usage=usage, + ) + + # Save to history only after a successful response. + self._history = [*messages, {"role": "assistant", "content": response.content}] + + return agent_response diff --git a/ddev/src/ddev/ai/agent/types.py b/ddev/src/ddev/ai/agent/types.py new file mode 100644 index 0000000000000..677ab92d25350 --- /dev/null +++ b/ddev/src/ddev/ai/agent/types.py @@ -0,0 +1,73 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from dataclasses import dataclass +from enum import StrEnum + +from pydantic import BaseModel + + +class StopReason(StrEnum): + """Maps Anthropic API stop_reason strings to a typed enum.""" + + END_TURN = "end_turn" + MAX_TOKENS = "max_tokens" + STOP_SEQUENCE = "stop_sequence" + TOOL_USE = "tool_use" + PAUSE_TURN = "pause_turn" + REFUSAL = "refusal" + + +@dataclass(frozen=True) +class ToolCall: + """A single tool invocation requested by the model.""" + + id: str + name: str + input: dict[str, object] + + +@dataclass(frozen=True) +class TokenUsage: + """Token accounting from a single API call.""" + + input_tokens: int + output_tokens: int + cache_read_input_tokens: int + cache_creation_input_tokens: int + + +class AgentResponse(BaseModel): + """The complete response from a single AnthropicAgent.send() call.""" + + stop_reason: StopReason + text: str + tool_calls: list[ToolCall] + usage: TokenUsage + + +class AgentError(Exception): + """Base class for all errors raised by AnthropicAgent.""" + + pass + + +class AgentConnectionError(AgentError): + """Network failure — the API was unreachable.""" + + pass + + +class AgentRateLimitError(AgentError): + """Rate limit hit — the request may be retried after a delay.""" + + pass + + +class AgentAPIError(AgentError): + """The API returned an error status code.""" + + def __init__(self, status_code: int, message: str) -> None: + super().__init__(message) + self.status_code = status_code diff --git a/ddev/tests/ai/agent/__init__.py b/ddev/tests/ai/agent/__init__.py new file mode 100644 index 0000000000000..75c6647cb9233 --- /dev/null +++ b/ddev/tests/ai/agent/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/tests/ai/agent/test_agent.py b/ddev/tests/ai/agent/test_agent.py new file mode 100644 index 0000000000000..7fa04103b9328 --- /dev/null +++ b/ddev/tests/ai/agent/test_agent.py @@ -0,0 +1,408 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import asyncio +from collections.abc import Callable +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import anthropic +import pytest + +from ddev.ai.agent.agent import AnthropicAgent +from ddev.ai.agent.types import ( + AgentAPIError, + AgentConnectionError, + AgentError, + AgentRateLimitError, + StopReason, +) +from ddev.ai.tools.core.registry import ToolRegistry + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def make_usage( + input_tokens: int = 10, + output_tokens: int = 20, + cache_read: int | None = None, + cache_creation: int | None = None, +) -> SimpleNamespace: + return SimpleNamespace( + input_tokens=input_tokens, + output_tokens=output_tokens, + cache_read_input_tokens=cache_read, + cache_creation_input_tokens=cache_creation, + ) + + +def make_text_block(text: str) -> anthropic.types.TextBlock: + return anthropic.types.TextBlock(type="text", text=text) + + +def make_tool_use_block( + id: str = "toolu_01", + name: str = "read_file", + input: dict | None = None, +) -> anthropic.types.ToolUseBlock: + return anthropic.types.ToolUseBlock( + type="tool_use", + id=id, + name=name, + input=input or {"path": "/tmp/file.txt"}, + ) + + +def make_response( + stop_reason: str | None, + content: list, + usage: SimpleNamespace | None = None, +) -> SimpleNamespace: + return SimpleNamespace( + stop_reason=stop_reason, + content=content, + usage=usage or make_usage(), + ) + + +def make_agent( + tools: ToolRegistry | None = None, + mock_response: SimpleNamespace | None = None, +) -> tuple[AnthropicAgent, AsyncMock]: + client = MagicMock(spec=anthropic.AsyncAnthropic) + client.messages = MagicMock() + client.messages.create = AsyncMock(return_value=mock_response or make_response("end_turn", [])) + registry = tools or ToolRegistry([]) + agent = AnthropicAgent( + client=client, + tools=registry, + system_prompt="You are helpful.", + name="test-agent", + ) + return agent, client.messages.create + + +# --------------------------------------------------------------------------- +# end_turn with a single TextBlock +# --------------------------------------------------------------------------- + + +def test_end_turn_single_text_block() -> None: + content = [make_text_block("Hello!")] + resp = make_response("end_turn", content) + agent, _ = make_agent(mock_response=resp) + + result = asyncio.run(agent.send("Hi")) + + assert result.stop_reason is StopReason.END_TURN + assert result.text == "Hello!" + assert result.tool_calls == [] + assert len(agent.history) == 2 + assert agent.history[0] == {"role": "user", "content": "Hi"} + assert agent.history[1] == {"role": "assistant", "content": content} + + +# --------------------------------------------------------------------------- +# tool_use +# --------------------------------------------------------------------------- + + +def test_tool_use_single_block() -> None: + block = make_tool_use_block(id="toolu_42", name="read_file", input={"path": "/etc/hosts"}) + resp = make_response("tool_use", [block]) + agent, _ = make_agent(mock_response=resp) + + result = asyncio.run(agent.send("Read hosts")) + + assert result.stop_reason is StopReason.TOOL_USE + assert len(result.tool_calls) == 1 + tc = result.tool_calls[0] + assert tc.id == "toolu_42" + assert tc.name == "read_file" + assert tc.input == {"path": "/etc/hosts"} + + +# --------------------------------------------------------------------------- +# mixed TextBlock + ToolUseBlock +# --------------------------------------------------------------------------- + + +def test_mixed_text_and_tool_use() -> None: + content = [ + make_text_block("I'll read the file for you."), + make_tool_use_block(id="toolu_01", name="read_file"), + ] + resp = make_response("tool_use", content) + agent, _ = make_agent(mock_response=resp) + + result = asyncio.run(agent.send("Read a file")) + + assert result.text == "I'll read the file for you." + assert len(result.tool_calls) == 1 + + +# --------------------------------------------------------------------------- +# Multiple TextBlocks are concatenated +# --------------------------------------------------------------------------- + + +def test_multiple_text_blocks_are_concatenated() -> None: + content = [make_text_block("Hello, "), make_text_block("world!")] + resp = make_response("end_turn", content) + agent, _ = make_agent(mock_response=resp) + + result = asyncio.run(agent.send("Hi")) + + assert result.text == "Hello, world!" + + +# --------------------------------------------------------------------------- +# max_tokens is a normal response (not an error) +# --------------------------------------------------------------------------- + + +def test_max_tokens_is_not_an_error() -> None: + resp = make_response("max_tokens", [make_text_block("Truncated...")]) + agent, _ = make_agent(mock_response=resp) + + result = asyncio.run(agent.send("Tell me everything")) + + assert result.stop_reason is StopReason.MAX_TOKENS + assert len(agent.history) == 2 + + +# --------------------------------------------------------------------------- +# allowed_tools filtering +# --------------------------------------------------------------------------- + + +class FakeTool: + def __init__(self, name: str) -> None: + self._name = name + + @property + def name(self) -> str: + return self._name + + @property + def description(self) -> str: + return "" + + @property + def definition(self) -> dict: + return {"name": self._name, "description": "", "input_schema": {}} + + async def run(self, raw: dict) -> None: + pass + + +@pytest.mark.parametrize( + ("tool_names", "allowed_tools", "expected_names"), + [ + (["read_file", "grep", "mkdir"], ["read_file"], ["read_file"]), + (["a", "b"], None, ["a", "b"]), + ], +) +def test_allowed_tools( + tool_names: list[str], + allowed_tools: list[str] | None, + expected_names: list[str], +) -> None: + registry = ToolRegistry([FakeTool(n) for n in tool_names]) + resp = make_response("end_turn", [make_text_block("ok")]) + agent, create_mock = make_agent(tools=registry, mock_response=resp) + + asyncio.run(agent.send("Hi", allowed_tools=allowed_tools)) + + sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]] + assert sent_names == expected_names + + +@pytest.mark.parametrize("allowed_tools", [[], ["nonexistent_tool"]]) +def test_allowed_tools_passes_not_given(allowed_tools: list[str]) -> None: + resp = make_response("end_turn", [make_text_block("ok")]) + agent, create_mock = make_agent(mock_response=resp) + + asyncio.run(agent.send("Hi", allowed_tools=allowed_tools)) + + assert create_mock.call_args.kwargs["tools"] is anthropic.NOT_GIVEN + + +# --------------------------------------------------------------------------- +# API errors map to the correct AgentError subclass +# --------------------------------------------------------------------------- + +_mock_500 = MagicMock() +_mock_500.status_code = 500 + + +@pytest.mark.parametrize( + "side_effect,expected_exc,extra_check", + [ + ( + anthropic.APIConnectionError(request=MagicMock()), + AgentConnectionError, + lambda e: "Connection failed" in str(e), + ), + ( + anthropic.RateLimitError( + message="rate limit", + response=MagicMock(status_code=429, headers={}), + body=None, + ), + AgentRateLimitError, + lambda e: "Rate limit exceeded" in str(e), + ), + ( + anthropic.APIStatusError( + message="internal server error", + response=_mock_500, + body=None, + ), + AgentAPIError, + lambda e: e.status_code == 500, + ), + ( + anthropic.APIResponseValidationError( + response=MagicMock(), + body=None, + ), + AgentError, + lambda e: "Response validation failed" in str(e), + ), + ], +) +def test_api_errors_map_correctly( + side_effect: Exception, + expected_exc: type[AgentError], + extra_check: Callable[[AgentError], bool], +) -> None: + client = MagicMock(spec=anthropic.AsyncAnthropic) + client.messages = MagicMock() + client.messages.create = AsyncMock(side_effect=side_effect) + agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") + + with pytest.raises(expected_exc) as exc_info: + asyncio.run(agent.send("Hi")) + + assert extra_check(exc_info.value) + assert agent.history == [] + + +# --------------------------------------------------------------------------- +# Unknown stop_reason raises AgentError, history unchanged +# --------------------------------------------------------------------------- + + +def test_unknown_stop_reason_raises_agent_error() -> None: + resp = make_response("totally_unknown_reason", []) + agent, _ = make_agent(mock_response=resp) + + with pytest.raises(AgentError) as exc_info: + asyncio.run(agent.send("Hi")) + + assert agent.history == [] + assert "Unknown stop_reason" in str(exc_info.value) + assert "totally_unknown_reason" in str(exc_info.value) + + +# --------------------------------------------------------------------------- +# cache_read_input_tokens=None defaults to 0 +# --------------------------------------------------------------------------- + + +def test_cache_tokens_none_defaults_to_zero() -> None: + usage = make_usage(cache_read=None, cache_creation=None) + resp = make_response("end_turn", [make_text_block("ok")], usage=usage) + agent, _ = make_agent(mock_response=resp) + + result = asyncio.run(agent.send("Hi")) + + assert result.usage.cache_read_input_tokens == 0 + assert result.usage.cache_creation_input_tokens == 0 + + +# --------------------------------------------------------------------------- +# Multi-turn — send str then send tool results → history has 4 entries +# --------------------------------------------------------------------------- + + +def test_multi_turn_history_grows_correctly() -> None: + tool_resp = make_response("tool_use", [make_tool_use_block(id="toolu_01")]) + text_resp = make_response("end_turn", [make_text_block("Done.")]) + + client = MagicMock(spec=anthropic.AsyncAnthropic) + client.messages = MagicMock() + client.messages.create = AsyncMock(side_effect=[tool_resp, text_resp]) + agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") + + first = asyncio.run(agent.send("Do X")) + assert first.stop_reason is StopReason.TOOL_USE + assert len(agent.history) == 2 + + tool_results = [{"type": "tool_result", "tool_use_id": "toolu_01", "content": "result"}] + second = asyncio.run(agent.send(tool_results)) + assert second.stop_reason is StopReason.END_TURN + assert len(agent.history) == 4 + assert agent.history[2]["role"] == "user" + assert agent.history[3]["role"] == "assistant" + + +# --------------------------------------------------------------------------- +# history property returns a copy +# --------------------------------------------------------------------------- + + +def test_history_property_returns_copy() -> None: + resp = make_response("end_turn", [make_text_block("ok")]) + agent, _ = make_agent(mock_response=resp) + asyncio.run(agent.send("Hi")) + + snapshot = agent.history + snapshot.clear() + + assert len(agent.history) == 2 + + +# --------------------------------------------------------------------------- +# reset() clears history +# --------------------------------------------------------------------------- + + +def test_reset_clears_history() -> None: + resp = make_response("end_turn", [make_text_block("ok")]) + agent, _ = make_agent(mock_response=resp) + asyncio.run(agent.send("Hi")) + assert len(agent.history) == 2 + + agent.reset() + assert agent.history == [] + + +# --------------------------------------------------------------------------- +# Error mid-conversation leaves history unchanged +# --------------------------------------------------------------------------- + + +def test_error_mid_conversation_leaves_history_unchanged() -> None: + ok_resp = make_response("end_turn", [make_text_block("ok")]) + client = MagicMock(spec=anthropic.AsyncAnthropic) + client.messages = MagicMock() + client.messages.create = AsyncMock( + side_effect=[ + ok_resp, + anthropic.APIConnectionError(request=MagicMock()), + ] + ) + agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") + + asyncio.run(agent.send("First message")) + history_after_first = agent.history[:] + + with pytest.raises(AgentConnectionError): + asyncio.run(agent.send("Second message")) + + assert agent.history == history_after_first From 953d3d2d200baf8af340be3b5dacd343e2301e6a Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Wed, 25 Mar 2026 16:58:57 +0100 Subject: [PATCH 2/5] Fix some bugs and improved tests --- ddev/src/ddev/ai/agent/agent.py | 8 +- ddev/src/ddev/ai/agent/types.py | 5 +- ddev/tests/ai/agent/test_agent.py | 131 +++++++++++++++--------------- 3 files changed, 73 insertions(+), 71 deletions(-) diff --git a/ddev/src/ddev/ai/agent/agent.py b/ddev/src/ddev/ai/agent/agent.py index 4cbed20072f26..7659f7fbfe474 100644 --- a/ddev/src/ddev/ai/agent/agent.py +++ b/ddev/src/ddev/ai/agent/agent.py @@ -5,7 +5,7 @@ from typing import Final import anthropic -from anthropic.types import MessageParam, ToolResultBlockParam +from anthropic.types import MessageParam, ToolParam, ToolResultBlockParam from ddev.ai.tools.core.registry import ToolRegistry @@ -51,7 +51,7 @@ def reset(self) -> None: """Clear conversation history to start a new conversation.""" self._history = [] - def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list: + def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list[ToolParam]: """Filter tool definitions by allowlist. None means all tools.""" if allowed_tools is None: return self._tools.definitions @@ -91,8 +91,8 @@ async def send( try: stop_reason = StopReason(response.stop_reason) - except ValueError: - raise AgentError(f"Unknown stop_reason: {response.stop_reason!r}") from None + except ValueError as e: + raise AgentError(f"Unknown stop_reason: {response.stop_reason!r}") from e text_parts: list[str] = [] tool_calls: list[ToolCall] = [] diff --git a/ddev/src/ddev/ai/agent/types.py b/ddev/src/ddev/ai/agent/types.py index 677ab92d25350..1c4b701b84ddb 100644 --- a/ddev/src/ddev/ai/agent/types.py +++ b/ddev/src/ddev/ai/agent/types.py @@ -5,8 +5,6 @@ from dataclasses import dataclass from enum import StrEnum -from pydantic import BaseModel - class StopReason(StrEnum): """Maps Anthropic API stop_reason strings to a typed enum.""" @@ -38,7 +36,8 @@ class TokenUsage: cache_creation_input_tokens: int -class AgentResponse(BaseModel): +@dataclass(frozen=True) +class AgentResponse: """The complete response from a single AnthropicAgent.send() call.""" stop_reason: StopReason diff --git a/ddev/tests/ai/agent/test_agent.py b/ddev/tests/ai/agent/test_agent.py index 7fa04103b9328..8372e2d00e519 100644 --- a/ddev/tests/ai/agent/test_agent.py +++ b/ddev/tests/ai/agent/test_agent.py @@ -3,7 +3,6 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import asyncio -from collections.abc import Callable from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock @@ -19,6 +18,7 @@ StopReason, ) from ddev.ai.tools.core.registry import ToolRegistry +from ddev.ai.tools.core.types import ToolResult # --------------------------------------------------------------------------- # Helpers @@ -195,30 +195,30 @@ def description(self) -> str: def definition(self) -> dict: return {"name": self._name, "description": "", "input_schema": {}} - async def run(self, raw: dict) -> None: + async def run(self, raw: dict) -> ToolResult: pass -@pytest.mark.parametrize( - ("tool_names", "allowed_tools", "expected_names"), - [ - (["read_file", "grep", "mkdir"], ["read_file"], ["read_file"]), - (["a", "b"], None, ["a", "b"]), - ], -) -def test_allowed_tools( - tool_names: list[str], - allowed_tools: list[str] | None, - expected_names: list[str], -) -> None: - registry = ToolRegistry([FakeTool(n) for n in tool_names]) +def test_allowed_tools_filters_to_subset() -> None: + registry = ToolRegistry([FakeTool(n) for n in ["read_file", "grep", "mkdir"]]) resp = make_response("end_turn", [make_text_block("ok")]) agent, create_mock = make_agent(tools=registry, mock_response=resp) - asyncio.run(agent.send("Hi", allowed_tools=allowed_tools)) + asyncio.run(agent.send("Hi", allowed_tools=["read_file"])) + + sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]] + assert sent_names == ["read_file"] + + +def test_allowed_tools_none_passes_all() -> None: + registry = ToolRegistry([FakeTool(n) for n in ["a", "b"]]) + resp = make_response("end_turn", [make_text_block("ok")]) + agent, create_mock = make_agent(tools=registry, mock_response=resp) + + asyncio.run(agent.send("Hi", allowed_tools=None)) sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]] - assert sent_names == expected_names + assert sent_names == ["a", "b"] @pytest.mark.parametrize("allowed_tools", [[], ["nonexistent_tool"]]) @@ -235,60 +235,63 @@ def test_allowed_tools_passes_not_given(allowed_tools: list[str]) -> None: # API errors map to the correct AgentError subclass # --------------------------------------------------------------------------- -_mock_500 = MagicMock() -_mock_500.status_code = 500 - -@pytest.mark.parametrize( - "side_effect,expected_exc,extra_check", - [ - ( - anthropic.APIConnectionError(request=MagicMock()), - AgentConnectionError, - lambda e: "Connection failed" in str(e), - ), - ( - anthropic.RateLimitError( - message="rate limit", - response=MagicMock(status_code=429, headers={}), - body=None, - ), - AgentRateLimitError, - lambda e: "Rate limit exceeded" in str(e), - ), - ( - anthropic.APIStatusError( - message="internal server error", - response=_mock_500, - body=None, - ), - AgentAPIError, - lambda e: e.status_code == 500, - ), - ( - anthropic.APIResponseValidationError( - response=MagicMock(), - body=None, - ), - AgentError, - lambda e: "Response validation failed" in str(e), - ), - ], -) -def test_api_errors_map_correctly( - side_effect: Exception, - expected_exc: type[AgentError], - extra_check: Callable[[AgentError], bool], -) -> None: +def _make_error_agent(side_effect: Exception) -> AnthropicAgent: client = MagicMock(spec=anthropic.AsyncAnthropic) client.messages = MagicMock() client.messages.create = AsyncMock(side_effect=side_effect) - agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") + return AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") + + +def test_connection_error_maps_to_agent_connection_error() -> None: + agent = _make_error_agent(anthropic.APIConnectionError(request=MagicMock())) + + with pytest.raises(AgentConnectionError) as exc_info: + asyncio.run(agent.send("Hi")) + + assert "Connection failed" in str(exc_info.value) + assert agent.history == [] + + +def test_rate_limit_error_maps_to_agent_rate_limit_error() -> None: + agent = _make_error_agent( + anthropic.RateLimitError( + message="rate limit", + response=MagicMock(status_code=429, headers={}), + body=None, + ) + ) + + with pytest.raises(AgentRateLimitError) as exc_info: + asyncio.run(agent.send("Hi")) + + assert "Rate limit exceeded" in str(exc_info.value) + assert agent.history == [] + - with pytest.raises(expected_exc) as exc_info: +def test_api_status_error_maps_to_agent_api_error() -> None: + agent = _make_error_agent( + anthropic.APIStatusError( + message="internal server error", + response=MagicMock(status_code=500), + body=None, + ) + ) + + with pytest.raises(AgentAPIError) as exc_info: + asyncio.run(agent.send("Hi")) + + assert exc_info.value.status_code == 500 + assert agent.history == [] + + +def test_response_validation_error_maps_to_agent_error() -> None: + agent = _make_error_agent(anthropic.APIResponseValidationError(response=MagicMock(), body=None)) + + with pytest.raises(AgentError) as exc_info: asyncio.run(agent.send("Hi")) - assert extra_check(exc_info.value) + assert "Response validation failed" in str(exc_info.value) assert agent.history == [] From 57ed67d14b2d0492704a834b33c0e8bd80a6e8d0 Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Wed, 25 Mar 2026 18:09:21 +0100 Subject: [PATCH 3/5] Rename agents to client and added \n btw TextBlocks --- .../src/ddev/ai/agent/{agent.py => client.py} | 57 ++++++++++++--- ddev/src/ddev/ai/agent/exceptions.py | 29 ++++++++ ddev/src/ddev/ai/agent/types.py | 72 ------------------- ddev/tests/ai/agent/test_agent.py | 7 +- 4 files changed, 80 insertions(+), 85 deletions(-) rename ddev/src/ddev/ai/agent/{agent.py => client.py} (77%) create mode 100644 ddev/src/ddev/ai/agent/exceptions.py delete mode 100644 ddev/src/ddev/ai/agent/types.py diff --git a/ddev/src/ddev/ai/agent/agent.py b/ddev/src/ddev/ai/agent/client.py similarity index 77% rename from ddev/src/ddev/ai/agent/agent.py rename to ddev/src/ddev/ai/agent/client.py index 7659f7fbfe474..b0b3f8e874c05 100644 --- a/ddev/src/ddev/ai/agent/agent.py +++ b/ddev/src/ddev/ai/agent/client.py @@ -2,6 +2,9 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +from copy import deepcopy +from dataclasses import dataclass +from enum import StrEnum from typing import Final import anthropic @@ -9,21 +12,57 @@ from ddev.ai.tools.core.registry import ToolRegistry -from .types import ( +from .exceptions import ( AgentAPIError, AgentConnectionError, AgentError, AgentRateLimitError, - AgentResponse, - StopReason, - TokenUsage, - ToolCall, ) -MODEL: Final[str] = "claude-opus-4-6" +MODEL: Final[str] = "claude-sonnet-4-6" MAX_TOKENS: Final[int] = 8192 +class StopReason(StrEnum): + """Maps Anthropic API stop_reason strings to a typed enum.""" + + END_TURN = "end_turn" + MAX_TOKENS = "max_tokens" + STOP_SEQUENCE = "stop_sequence" + TOOL_USE = "tool_use" + PAUSE_TURN = "pause_turn" + REFUSAL = "refusal" + + +@dataclass(frozen=True) +class ToolCall: + """A single tool invocation requested by the model.""" + + id: str + name: str + input: dict[str, object] + + +@dataclass(frozen=True) +class TokenUsage: + """Token accounting from a single API call.""" + + input_tokens: int + output_tokens: int + cache_read_input_tokens: int + cache_creation_input_tokens: int + + +@dataclass(frozen=True) +class AgentResponse: + """The complete response from a single AnthropicAgent.send() call.""" + + stop_reason: StopReason + text: str + tool_calls: list[ToolCall] + usage: TokenUsage + + class AnthropicAgent: def __init__( self, @@ -45,7 +84,7 @@ def __init__( @property def history(self) -> list[MessageParam]: """Read-only snapshot of the conversation history.""" - return list(self._history) + return deepcopy(self._history) def reset(self) -> None: """Clear conversation history to start a new conversation.""" @@ -114,12 +153,12 @@ async def send( agent_response = AgentResponse( stop_reason=stop_reason, - text="".join(text_parts), + text="\n".join(text_parts), tool_calls=tool_calls, usage=usage, ) # Save to history only after a successful response. - self._history = [*messages, {"role": "assistant", "content": response.content}] + self._history.extend([user_msg, {"role": "assistant", "content": response.content}]) return agent_response diff --git a/ddev/src/ddev/ai/agent/exceptions.py b/ddev/src/ddev/ai/agent/exceptions.py new file mode 100644 index 0000000000000..d0d25d3665239 --- /dev/null +++ b/ddev/src/ddev/ai/agent/exceptions.py @@ -0,0 +1,29 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + + +class AgentError(Exception): + """Base class for all errors raised by AnthropicAgent.""" + + pass + + +class AgentConnectionError(AgentError): + """Network failure — the API was unreachable.""" + + pass + + +class AgentRateLimitError(AgentError): + """Rate limit hit — the request may be retried after a delay.""" + + pass + + +class AgentAPIError(AgentError): + """The API returned an error status code.""" + + def __init__(self, status_code: int, message: str) -> None: + super().__init__(message) + self.status_code = status_code diff --git a/ddev/src/ddev/ai/agent/types.py b/ddev/src/ddev/ai/agent/types.py deleted file mode 100644 index 1c4b701b84ddb..0000000000000 --- a/ddev/src/ddev/ai/agent/types.py +++ /dev/null @@ -1,72 +0,0 @@ -# (C) Datadog, Inc. 2026-present -# All rights reserved -# Licensed under a 3-clause BSD style license (see LICENSE) - -from dataclasses import dataclass -from enum import StrEnum - - -class StopReason(StrEnum): - """Maps Anthropic API stop_reason strings to a typed enum.""" - - END_TURN = "end_turn" - MAX_TOKENS = "max_tokens" - STOP_SEQUENCE = "stop_sequence" - TOOL_USE = "tool_use" - PAUSE_TURN = "pause_turn" - REFUSAL = "refusal" - - -@dataclass(frozen=True) -class ToolCall: - """A single tool invocation requested by the model.""" - - id: str - name: str - input: dict[str, object] - - -@dataclass(frozen=True) -class TokenUsage: - """Token accounting from a single API call.""" - - input_tokens: int - output_tokens: int - cache_read_input_tokens: int - cache_creation_input_tokens: int - - -@dataclass(frozen=True) -class AgentResponse: - """The complete response from a single AnthropicAgent.send() call.""" - - stop_reason: StopReason - text: str - tool_calls: list[ToolCall] - usage: TokenUsage - - -class AgentError(Exception): - """Base class for all errors raised by AnthropicAgent.""" - - pass - - -class AgentConnectionError(AgentError): - """Network failure — the API was unreachable.""" - - pass - - -class AgentRateLimitError(AgentError): - """Rate limit hit — the request may be retried after a delay.""" - - pass - - -class AgentAPIError(AgentError): - """The API returned an error status code.""" - - def __init__(self, status_code: int, message: str) -> None: - super().__init__(message) - self.status_code = status_code diff --git a/ddev/tests/ai/agent/test_agent.py b/ddev/tests/ai/agent/test_agent.py index 8372e2d00e519..0a261baf02679 100644 --- a/ddev/tests/ai/agent/test_agent.py +++ b/ddev/tests/ai/agent/test_agent.py @@ -9,13 +9,12 @@ import anthropic import pytest -from ddev.ai.agent.agent import AnthropicAgent -from ddev.ai.agent.types import ( +from ddev.ai.agent.client import AnthropicAgent, StopReason +from ddev.ai.agent.exceptions import ( AgentAPIError, AgentConnectionError, AgentError, AgentRateLimitError, - StopReason, ) from ddev.ai.tools.core.registry import ToolRegistry from ddev.ai.tools.core.types import ToolResult @@ -156,7 +155,7 @@ def test_multiple_text_blocks_are_concatenated() -> None: result = asyncio.run(agent.send("Hi")) - assert result.text == "Hello, world!" + assert result.text == "Hello, \nworld!" # --------------------------------------------------------------------------- From c9b34475136defa4b2f2116912782b3d5786e366 Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Thu, 26 Mar 2026 16:00:21 +0100 Subject: [PATCH 4/5] Add ContextUsage and modify tools' allowed_callers --- ddev/pyproject.toml | 2 +- ddev/src/ddev/ai/agent/client.py | 56 +++++++++++++++---- ddev/src/ddev/ai/tools/core/registry.py | 9 +-- .../agent/{test_agent.py => test_client.py} | 44 +++++++++++++++ ddev/tests/ai/tools/core/test_registry.py | 4 +- 5 files changed, 93 insertions(+), 22 deletions(-) rename ddev/tests/ai/agent/{test_agent.py => test_client.py} (88%) diff --git a/ddev/pyproject.toml b/ddev/pyproject.toml index 8cfbc07271cf7..3118006baa38d 100644 --- a/ddev/pyproject.toml +++ b/ddev/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] dependencies = [ - "anthropic>=0.18.0", + "anthropic>=0.86.0", "click~=8.1.6", "coverage", "datadog-api-client==2.20.0", diff --git a/ddev/src/ddev/ai/agent/client.py b/ddev/src/ddev/ai/agent/client.py index b0b3f8e874c05..6c1c6d44f70af 100644 --- a/ddev/src/ddev/ai/agent/client.py +++ b/ddev/src/ddev/ai/agent/client.py @@ -20,7 +20,8 @@ ) MODEL: Final[str] = "claude-sonnet-4-6" -MAX_TOKENS: Final[int] = 8192 +MAX_TOKENS: Final[int] = 8192 # max tokens per response +ALLOWED_TOOL_CALLERS: Final = ["code_execution_20260120"] class StopReason(StrEnum): @@ -43,14 +44,31 @@ class ToolCall: input: dict[str, object] +@dataclass(frozen=True) +class ContextUsage: + """Context window accounting for a single API call.""" + + window_size: int + used_tokens: int + + @property + def context_pct(self) -> float: + return self.used_tokens / self.window_size * 100 + + @property + def remaining_tokens(self) -> int: + return self.window_size - self.used_tokens + + @dataclass(frozen=True) class TokenUsage: """Token accounting from a single API call.""" - input_tokens: int - output_tokens: int - cache_read_input_tokens: int - cache_creation_input_tokens: int + input_tokens: int # tokens sent to the model (system_prompt + history) + output_tokens: int # tokens the model generated + cache_read_input_tokens: int # tokens read from prompt cache + cache_creation_input_tokens: int # tokens written to prompt cache + context: ContextUsage @dataclass(frozen=True) @@ -72,6 +90,7 @@ def __init__( name: str, model: str = MODEL, max_tokens: int = MAX_TOKENS, + tool_execution: bool = False, ) -> None: self._client = client self._tools = tools @@ -79,7 +98,9 @@ def __init__( self.name = name self._model = model self._max_tokens = max_tokens + self._tool_execution = tool_execution self._history: list[MessageParam] = [] + self._context_window: int | None = None @property def history(self) -> list[MessageParam]: @@ -90,12 +111,21 @@ def reset(self) -> None: """Clear conversation history to start a new conversation.""" self._history = [] + async def _get_context_window(self) -> int: + if self._context_window is None: + info = await self._client.models.retrieve(self._model) + self._context_window = info.max_input_tokens + return self._context_window + def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list[ToolParam]: """Filter tool definitions by allowlist. None means all tools.""" - if allowed_tools is None: - return self._tools.definitions - allowed = set(allowed_tools) - return [d for d in self._tools.definitions if d["name"] in allowed] + definitions = self._tools.definitions + if allowed_tools is not None: + allowed = set(allowed_tools) + definitions = [d for d in definitions if d["name"] in allowed] + if not self._tool_execution: + definitions = [{**d, "allowed_callers": ALLOWED_TOOL_CALLERS} for d in definitions] + return definitions async def send( self, @@ -144,11 +174,15 @@ async def send( # ThinkingBlock and RedactedThinkingBlock are intentionally ignored. # Extended thinking support can add a `thinking: str` field to AgentResponse later. + cache_read = response.usage.cache_read_input_tokens or 0 + cache_creation = response.usage.cache_creation_input_tokens or 0 + used_tokens = response.usage.input_tokens + cache_read + cache_creation usage = TokenUsage( input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens, - cache_read_input_tokens=response.usage.cache_read_input_tokens or 0, - cache_creation_input_tokens=response.usage.cache_creation_input_tokens or 0, + cache_read_input_tokens=cache_read, + cache_creation_input_tokens=cache_creation, + context=ContextUsage(window_size=await self._get_context_window(), used_tokens=used_tokens), ) agent_response = AgentResponse( diff --git a/ddev/src/ddev/ai/tools/core/registry.py b/ddev/src/ddev/ai/tools/core/registry.py index 29c6f92fb8801..240e969a81843 100644 --- a/ddev/src/ddev/ai/tools/core/registry.py +++ b/ddev/src/ddev/ai/tools/core/registry.py @@ -2,15 +2,11 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from typing import Final - from anthropic.types import ToolParam from .protocol import ToolProtocol from .types import ToolResult -ALLOWED_TOOL_CALLERS: Final = ["code_execution_20260120"] - class ToolRegistry: """Registry holding all available tools.""" @@ -20,9 +16,8 @@ def __init__(self, tools: list[ToolProtocol]) -> None: @property def definitions(self) -> list[ToolParam]: - """Return Anthropic SDK tool definitions for all registered tools. - Each tool definition dict is not mutated, but a new dict is returned with the allowed_callers key added.""" - return [{**tool.definition, "allowed_callers": ALLOWED_TOOL_CALLERS} for tool in self._tools.values()] + """Return Anthropic SDK tool definitions for all registered tools.""" + return [tool.definition for tool in self._tools.values()] async def run(self, name: str, raw: dict[str, object]) -> ToolResult: """Execute a tool by name, returning an error result if not found.""" diff --git a/ddev/tests/ai/agent/test_agent.py b/ddev/tests/ai/agent/test_client.py similarity index 88% rename from ddev/tests/ai/agent/test_agent.py rename to ddev/tests/ai/agent/test_client.py index 0a261baf02679..e67a6ee8d0785 100644 --- a/ddev/tests/ai/agent/test_agent.py +++ b/ddev/tests/ai/agent/test_client.py @@ -67,6 +67,9 @@ def make_response( ) +FAKE_CONTEXT_WINDOW = 200_000 + + def make_agent( tools: ToolRegistry | None = None, mock_response: SimpleNamespace | None = None, @@ -74,6 +77,8 @@ def make_agent( client = MagicMock(spec=anthropic.AsyncAnthropic) client.messages = MagicMock() client.messages.create = AsyncMock(return_value=mock_response or make_response("end_turn", [])) + client.models = MagicMock() + client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW)) registry = tools or ToolRegistry([]) agent = AnthropicAgent( client=client, @@ -327,6 +332,41 @@ def test_cache_tokens_none_defaults_to_zero() -> None: assert result.usage.cache_creation_input_tokens == 0 +# --------------------------------------------------------------------------- +# ContextUsage fields +# --------------------------------------------------------------------------- + + +def test_context_usage_fields() -> None: + usage = make_usage(input_tokens=1000, cache_read=500, cache_creation=200) + resp = make_response("end_turn", [make_text_block("ok")], usage=usage) + agent, _ = make_agent(mock_response=resp) + + result = asyncio.run(agent.send("Hi")) + + ctx = result.usage.context + assert ctx.window_size == FAKE_CONTEXT_WINDOW + assert ctx.used_tokens == 1700 # 1000 + 500 + 200 + assert ctx.context_pct == pytest.approx(1700 / FAKE_CONTEXT_WINDOW * 100) + assert ctx.remaining_tokens == FAKE_CONTEXT_WINDOW - 1700 + + +# --------------------------------------------------------------------------- +# context_window is fetched once and cached across multiple sends +# --------------------------------------------------------------------------- + + +def test_context_window_fetched_once() -> None: + resp = make_response("end_turn", [make_text_block("ok")]) + agent, _ = make_agent(mock_response=resp) + agent._client.messages.create = AsyncMock(return_value=resp) + + asyncio.run(agent.send("First")) + asyncio.run(agent.send("Second")) + + agent._client.models.retrieve.assert_awaited_once() + + # --------------------------------------------------------------------------- # Multi-turn — send str then send tool results → history has 4 entries # --------------------------------------------------------------------------- @@ -339,6 +379,8 @@ def test_multi_turn_history_grows_correctly() -> None: client = MagicMock(spec=anthropic.AsyncAnthropic) client.messages = MagicMock() client.messages.create = AsyncMock(side_effect=[tool_resp, text_resp]) + client.models = MagicMock() + client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW)) agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") first = asyncio.run(agent.send("Do X")) @@ -399,6 +441,8 @@ def test_error_mid_conversation_leaves_history_unchanged() -> None: anthropic.APIConnectionError(request=MagicMock()), ] ) + client.models = MagicMock() + client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW)) agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") asyncio.run(agent.send("First message")) diff --git a/ddev/tests/ai/tools/core/test_registry.py b/ddev/tests/ai/tools/core/test_registry.py index fdd42714b6ed4..245b3f67490d1 100644 --- a/ddev/tests/ai/tools/core/test_registry.py +++ b/ddev/tests/ai/tools/core/test_registry.py @@ -5,7 +5,7 @@ import pytest -from ddev.ai.tools.core.registry import ALLOWED_TOOL_CALLERS, ToolRegistry +from ddev.ai.tools.core.registry import ToolRegistry from ddev.ai.tools.core.types import ToolResult # --------------------------------------------------------------------------- @@ -76,8 +76,6 @@ def test_empty_registry_returns_empty_list(): def test_tool_registry_definitions_returns_all_tool_definitions(): registry = ToolRegistry([FakeTool("a"), FakeTool("b")]) assert len(registry.definitions) == 2 - for defn in registry.definitions: - assert defn["allowed_callers"] == ALLOWED_TOOL_CALLERS def test_definition_contains_tool_name(): From 80fb3d2150e36ea220010397759c823b1ae97222 Mon Sep 17 00:00:00 2001 From: Luis Orofino Date: Thu, 26 Mar 2026 17:29:07 +0100 Subject: [PATCH 5/5] Add docstrings and pytest-asyncio --- ddev/hatch.toml | 1 + ddev/pyproject.toml | 3 + ddev/src/ddev/ai/agent/client.py | 41 +++++++--- ddev/tests/ai/agent/test_client.py | 87 +++++++++++----------- ddev/tests/ai/tools/core/test_base.py | 13 ++-- ddev/tests/ai/tools/core/test_registry.py | 21 +++--- ddev/tests/ai/tools/fs/conftest.py | 5 +- ddev/tests/ai/tools/fs/test_append_file.py | 29 ++++---- ddev/tests/ai/tools/fs/test_create_file.py | 29 ++++---- ddev/tests/ai/tools/fs/test_edit_file.py | 43 +++++------ ddev/tests/ai/tools/fs/test_read_file.py | 33 ++++---- ddev/tests/ai/tools/fs/test_workflow.py | 21 +++--- ddev/tests/ai/tools/http/test_http_get.py | 25 +++---- ddev/tests/ai/tools/shell/test_base.py | 48 ++++++------ ddev/tests/ai/tools/shell/test_tools.py | 5 +- 15 files changed, 211 insertions(+), 193 deletions(-) diff --git a/ddev/hatch.toml b/ddev/hatch.toml index 8a07ebbf5c149..a5fa11668e95f 100644 --- a/ddev/hatch.toml +++ b/ddev/hatch.toml @@ -9,6 +9,7 @@ python = "3.13" e2e-env = false dependencies = [ "pyyaml", + "pytest-asyncio", "vcrpy", ] # TODO: remove this when the old CLI is gone diff --git a/ddev/pyproject.toml b/ddev/pyproject.toml index 3118006baa38d..33af6ea6cbbeb 100644 --- a/ddev/pyproject.toml +++ b/ddev/pyproject.toml @@ -136,3 +136,6 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] #Tests can use assertions and relative imports "**/tests/**/*" = ["I252"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" diff --git a/ddev/src/ddev/ai/agent/client.py b/ddev/src/ddev/ai/agent/client.py index 6c1c6d44f70af..d576429015cef 100644 --- a/ddev/src/ddev/ai/agent/client.py +++ b/ddev/src/ddev/ai/agent/client.py @@ -5,7 +5,7 @@ from copy import deepcopy from dataclasses import dataclass from enum import StrEnum -from typing import Final +from typing import Any, Final import anthropic from anthropic.types import MessageParam, ToolParam, ToolResultBlockParam @@ -19,8 +19,8 @@ AgentRateLimitError, ) -MODEL: Final[str] = "claude-sonnet-4-6" -MAX_TOKENS: Final[int] = 8192 # max tokens per response +DEFAULT_MODEL: Final[str] = "claude-sonnet-4-6" +DEFAULT_MAX_TOKENS: Final[int] = 8192 # max tokens per response ALLOWED_TOOL_CALLERS: Final = ["code_execution_20260120"] @@ -41,7 +41,7 @@ class ToolCall: id: str name: str - input: dict[str, object] + input: dict[str, Any] @dataclass(frozen=True) @@ -73,7 +73,8 @@ class TokenUsage: @dataclass(frozen=True) class AgentResponse: - """The complete response from a single AnthropicAgent.send() call.""" + """The complete response from a single AnthropicAgent.send() call. + Adds useful metadata to the response of the Anthropic API.""" stop_reason: StopReason text: str @@ -82,23 +83,36 @@ class AgentResponse: class AnthropicAgent: + """A wrapper around the Anthropic API that provides a simple interface for interacting with agents.""" + def __init__( self, client: anthropic.AsyncAnthropic, tools: ToolRegistry, system_prompt: str, name: str, - model: str = MODEL, - max_tokens: int = MAX_TOKENS, - tool_execution: bool = False, + model: str = DEFAULT_MODEL, + max_tokens: int = DEFAULT_MAX_TOKENS, + programmatic_tool_calling: bool = False, ) -> None: + """Initialize an AnthropicAgent. + Args: + client: The Anthropic client to use. + tools: The ToolRegistry to use (might not be used in every call if allowed_tools in send() is provided) + system_prompt: The system prompt to use. + name: The name of the agent. + model: The model to use. + max_tokens: The max tokens per response. + programmatic_tool_calling: Whether to allow programmatic tool calling. + """ + self._client = client self._tools = tools self._system_prompt = system_prompt self.name = name self._model = model self._max_tokens = max_tokens - self._tool_execution = tool_execution + self._programmatic_tool_calling = programmatic_tool_calling self._history: list[MessageParam] = [] self._context_window: int | None = None @@ -123,7 +137,7 @@ def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list[ToolPar if allowed_tools is not None: allowed = set(allowed_tools) definitions = [d for d in definitions if d["name"] in allowed] - if not self._tool_execution: + if not self._programmatic_tool_calling: definitions = [{**d, "allowed_callers": ALLOWED_TOOL_CALLERS} for d in definitions] return definitions @@ -132,6 +146,13 @@ async def send( content: str | list[ToolResultBlockParam], allowed_tools: list[str] | None = None, ) -> AgentResponse: + """Send a message to the agent and return the response. + Args: + content: The content to send to the agent. + allowed_tools: The tools in the ToolRegistry to allow the agent to use. + Returns: + An AgentResponse object containing the response from the agent. + """ tool_defs = self._get_tool_definitions(allowed_tools) user_msg: MessageParam = {"role": "user", "content": content} diff --git a/ddev/tests/ai/agent/test_client.py b/ddev/tests/ai/agent/test_client.py index e67a6ee8d0785..f4d1b9f5e8c96 100644 --- a/ddev/tests/ai/agent/test_client.py +++ b/ddev/tests/ai/agent/test_client.py @@ -2,7 +2,6 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock @@ -94,12 +93,12 @@ def make_agent( # --------------------------------------------------------------------------- -def test_end_turn_single_text_block() -> None: +async def test_end_turn_single_text_block() -> None: content = [make_text_block("Hello!")] resp = make_response("end_turn", content) agent, _ = make_agent(mock_response=resp) - result = asyncio.run(agent.send("Hi")) + result = await agent.send("Hi") assert result.stop_reason is StopReason.END_TURN assert result.text == "Hello!" @@ -114,12 +113,12 @@ def test_end_turn_single_text_block() -> None: # --------------------------------------------------------------------------- -def test_tool_use_single_block() -> None: +async def test_tool_use_single_block() -> None: block = make_tool_use_block(id="toolu_42", name="read_file", input={"path": "/etc/hosts"}) resp = make_response("tool_use", [block]) agent, _ = make_agent(mock_response=resp) - result = asyncio.run(agent.send("Read hosts")) + result = await agent.send("Read hosts") assert result.stop_reason is StopReason.TOOL_USE assert len(result.tool_calls) == 1 @@ -134,7 +133,7 @@ def test_tool_use_single_block() -> None: # --------------------------------------------------------------------------- -def test_mixed_text_and_tool_use() -> None: +async def test_mixed_text_and_tool_use() -> None: content = [ make_text_block("I'll read the file for you."), make_tool_use_block(id="toolu_01", name="read_file"), @@ -142,7 +141,7 @@ def test_mixed_text_and_tool_use() -> None: resp = make_response("tool_use", content) agent, _ = make_agent(mock_response=resp) - result = asyncio.run(agent.send("Read a file")) + result = await agent.send("Read a file") assert result.text == "I'll read the file for you." assert len(result.tool_calls) == 1 @@ -153,12 +152,12 @@ def test_mixed_text_and_tool_use() -> None: # --------------------------------------------------------------------------- -def test_multiple_text_blocks_are_concatenated() -> None: +async def test_multiple_text_blocks_are_concatenated() -> None: content = [make_text_block("Hello, "), make_text_block("world!")] resp = make_response("end_turn", content) agent, _ = make_agent(mock_response=resp) - result = asyncio.run(agent.send("Hi")) + result = await agent.send("Hi") assert result.text == "Hello, \nworld!" @@ -168,11 +167,11 @@ def test_multiple_text_blocks_are_concatenated() -> None: # --------------------------------------------------------------------------- -def test_max_tokens_is_not_an_error() -> None: +async def test_max_tokens_is_not_an_error() -> None: resp = make_response("max_tokens", [make_text_block("Truncated...")]) agent, _ = make_agent(mock_response=resp) - result = asyncio.run(agent.send("Tell me everything")) + result = await agent.send("Tell me everything") assert result.stop_reason is StopReason.MAX_TOKENS assert len(agent.history) == 2 @@ -203,34 +202,34 @@ async def run(self, raw: dict) -> ToolResult: pass -def test_allowed_tools_filters_to_subset() -> None: +async def test_allowed_tools_filters_to_subset() -> None: registry = ToolRegistry([FakeTool(n) for n in ["read_file", "grep", "mkdir"]]) resp = make_response("end_turn", [make_text_block("ok")]) agent, create_mock = make_agent(tools=registry, mock_response=resp) - asyncio.run(agent.send("Hi", allowed_tools=["read_file"])) + await agent.send("Hi", allowed_tools=["read_file"]) sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]] assert sent_names == ["read_file"] -def test_allowed_tools_none_passes_all() -> None: +async def test_allowed_tools_none_passes_all() -> None: registry = ToolRegistry([FakeTool(n) for n in ["a", "b"]]) resp = make_response("end_turn", [make_text_block("ok")]) agent, create_mock = make_agent(tools=registry, mock_response=resp) - asyncio.run(agent.send("Hi", allowed_tools=None)) + await agent.send("Hi", allowed_tools=None) sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]] assert sent_names == ["a", "b"] @pytest.mark.parametrize("allowed_tools", [[], ["nonexistent_tool"]]) -def test_allowed_tools_passes_not_given(allowed_tools: list[str]) -> None: +async def test_allowed_tools_passes_not_given(allowed_tools: list[str]) -> None: resp = make_response("end_turn", [make_text_block("ok")]) agent, create_mock = make_agent(mock_response=resp) - asyncio.run(agent.send("Hi", allowed_tools=allowed_tools)) + await agent.send("Hi", allowed_tools=allowed_tools) assert create_mock.call_args.kwargs["tools"] is anthropic.NOT_GIVEN @@ -247,17 +246,17 @@ def _make_error_agent(side_effect: Exception) -> AnthropicAgent: return AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") -def test_connection_error_maps_to_agent_connection_error() -> None: +async def test_connection_error_maps_to_agent_connection_error() -> None: agent = _make_error_agent(anthropic.APIConnectionError(request=MagicMock())) with pytest.raises(AgentConnectionError) as exc_info: - asyncio.run(agent.send("Hi")) + await agent.send("Hi") assert "Connection failed" in str(exc_info.value) assert agent.history == [] -def test_rate_limit_error_maps_to_agent_rate_limit_error() -> None: +async def test_rate_limit_error_maps_to_agent_rate_limit_error() -> None: agent = _make_error_agent( anthropic.RateLimitError( message="rate limit", @@ -267,13 +266,13 @@ def test_rate_limit_error_maps_to_agent_rate_limit_error() -> None: ) with pytest.raises(AgentRateLimitError) as exc_info: - asyncio.run(agent.send("Hi")) + await agent.send("Hi") assert "Rate limit exceeded" in str(exc_info.value) assert agent.history == [] -def test_api_status_error_maps_to_agent_api_error() -> None: +async def test_api_status_error_maps_to_agent_api_error() -> None: agent = _make_error_agent( anthropic.APIStatusError( message="internal server error", @@ -283,17 +282,17 @@ def test_api_status_error_maps_to_agent_api_error() -> None: ) with pytest.raises(AgentAPIError) as exc_info: - asyncio.run(agent.send("Hi")) + await agent.send("Hi") assert exc_info.value.status_code == 500 assert agent.history == [] -def test_response_validation_error_maps_to_agent_error() -> None: +async def test_response_validation_error_maps_to_agent_error() -> None: agent = _make_error_agent(anthropic.APIResponseValidationError(response=MagicMock(), body=None)) with pytest.raises(AgentError) as exc_info: - asyncio.run(agent.send("Hi")) + await agent.send("Hi") assert "Response validation failed" in str(exc_info.value) assert agent.history == [] @@ -304,12 +303,12 @@ def test_response_validation_error_maps_to_agent_error() -> None: # --------------------------------------------------------------------------- -def test_unknown_stop_reason_raises_agent_error() -> None: +async def test_unknown_stop_reason_raises_agent_error() -> None: resp = make_response("totally_unknown_reason", []) agent, _ = make_agent(mock_response=resp) with pytest.raises(AgentError) as exc_info: - asyncio.run(agent.send("Hi")) + await agent.send("Hi") assert agent.history == [] assert "Unknown stop_reason" in str(exc_info.value) @@ -321,12 +320,12 @@ def test_unknown_stop_reason_raises_agent_error() -> None: # --------------------------------------------------------------------------- -def test_cache_tokens_none_defaults_to_zero() -> None: +async def test_cache_tokens_none_defaults_to_zero() -> None: usage = make_usage(cache_read=None, cache_creation=None) resp = make_response("end_turn", [make_text_block("ok")], usage=usage) agent, _ = make_agent(mock_response=resp) - result = asyncio.run(agent.send("Hi")) + result = await agent.send("Hi") assert result.usage.cache_read_input_tokens == 0 assert result.usage.cache_creation_input_tokens == 0 @@ -337,12 +336,12 @@ def test_cache_tokens_none_defaults_to_zero() -> None: # --------------------------------------------------------------------------- -def test_context_usage_fields() -> None: +async def test_context_usage_fields() -> None: usage = make_usage(input_tokens=1000, cache_read=500, cache_creation=200) resp = make_response("end_turn", [make_text_block("ok")], usage=usage) agent, _ = make_agent(mock_response=resp) - result = asyncio.run(agent.send("Hi")) + result = await agent.send("Hi") ctx = result.usage.context assert ctx.window_size == FAKE_CONTEXT_WINDOW @@ -356,13 +355,13 @@ def test_context_usage_fields() -> None: # --------------------------------------------------------------------------- -def test_context_window_fetched_once() -> None: +async def test_context_window_fetched_once() -> None: resp = make_response("end_turn", [make_text_block("ok")]) agent, _ = make_agent(mock_response=resp) agent._client.messages.create = AsyncMock(return_value=resp) - asyncio.run(agent.send("First")) - asyncio.run(agent.send("Second")) + await agent.send("First") + await agent.send("Second") agent._client.models.retrieve.assert_awaited_once() @@ -372,7 +371,7 @@ def test_context_window_fetched_once() -> None: # --------------------------------------------------------------------------- -def test_multi_turn_history_grows_correctly() -> None: +async def test_multi_turn_history_grows_correctly() -> None: tool_resp = make_response("tool_use", [make_tool_use_block(id="toolu_01")]) text_resp = make_response("end_turn", [make_text_block("Done.")]) @@ -383,12 +382,12 @@ def test_multi_turn_history_grows_correctly() -> None: client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW)) agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") - first = asyncio.run(agent.send("Do X")) + first = await agent.send("Do X") assert first.stop_reason is StopReason.TOOL_USE assert len(agent.history) == 2 tool_results = [{"type": "tool_result", "tool_use_id": "toolu_01", "content": "result"}] - second = asyncio.run(agent.send(tool_results)) + second = await agent.send(tool_results) assert second.stop_reason is StopReason.END_TURN assert len(agent.history) == 4 assert agent.history[2]["role"] == "user" @@ -400,10 +399,10 @@ def test_multi_turn_history_grows_correctly() -> None: # --------------------------------------------------------------------------- -def test_history_property_returns_copy() -> None: +async def test_history_property_returns_copy() -> None: resp = make_response("end_turn", [make_text_block("ok")]) agent, _ = make_agent(mock_response=resp) - asyncio.run(agent.send("Hi")) + await agent.send("Hi") snapshot = agent.history snapshot.clear() @@ -416,10 +415,10 @@ def test_history_property_returns_copy() -> None: # --------------------------------------------------------------------------- -def test_reset_clears_history() -> None: +async def test_reset_clears_history() -> None: resp = make_response("end_turn", [make_text_block("ok")]) agent, _ = make_agent(mock_response=resp) - asyncio.run(agent.send("Hi")) + await agent.send("Hi") assert len(agent.history) == 2 agent.reset() @@ -431,7 +430,7 @@ def test_reset_clears_history() -> None: # --------------------------------------------------------------------------- -def test_error_mid_conversation_leaves_history_unchanged() -> None: +async def test_error_mid_conversation_leaves_history_unchanged() -> None: ok_resp = make_response("end_turn", [make_text_block("ok")]) client = MagicMock(spec=anthropic.AsyncAnthropic) client.messages = MagicMock() @@ -445,10 +444,10 @@ def test_error_mid_conversation_leaves_history_unchanged() -> None: client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW)) agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t") - asyncio.run(agent.send("First message")) + await agent.send("First message") history_after_first = agent.history[:] with pytest.raises(AgentConnectionError): - asyncio.run(agent.send("Second message")) + await agent.send("Second message") assert agent.history == history_after_first diff --git a/ddev/tests/ai/tools/core/test_base.py b/ddev/tests/ai/tools/core/test_base.py index 96cd0f8b07d0c..35e94f750a69e 100644 --- a/ddev/tests/ai/tools/core/test_base.py +++ b/ddev/tests/ai/tools/core/test_base.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from typing import Annotated import pytest @@ -194,8 +193,8 @@ async def __call__(self, tool_input: SimpleInput) -> ToolResult: # --- run(): happy path --- -def test_run_valid_input_returns_success(echo_tool: EchoTool): - result = asyncio.run(echo_tool.run({"message": "hello"})) +async def test_run_valid_input_returns_success(echo_tool: EchoTool): + result = await echo_tool.run({"message": "hello"}) assert result.success is True assert result.data == "hello" @@ -210,8 +209,8 @@ def test_run_valid_input_returns_success(echo_tool: EchoTool): {"message": "hi", "extra": "oops"}, ], ) -def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict): - result = asyncio.run(echo_tool.run(raw)) +async def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict): + result = await echo_tool.run(raw) assert result.success is False assert result.error is not None @@ -219,8 +218,8 @@ def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict): # --- run(): __call__ exception handling --- -def test_run_captures_exception_from_call(failing_tool: FailingTool): - result = asyncio.run(failing_tool.run({"message": "boom"})) +async def test_run_captures_exception_from_call(failing_tool: FailingTool): + result = await failing_tool.run({"message": "boom"}) assert isinstance(result, ToolResult) assert result.success is False assert "RuntimeError" in result.error diff --git a/ddev/tests/ai/tools/core/test_registry.py b/ddev/tests/ai/tools/core/test_registry.py index 245b3f67490d1..1366a9d8b5be8 100644 --- a/ddev/tests/ai/tools/core/test_registry.py +++ b/ddev/tests/ai/tools/core/test_registry.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio import pytest @@ -88,41 +87,41 @@ def test_definition_contains_tool_name(): # --------------------------------------------------------------------------- -def test_run_dispatches_to_correct_tool(): +async def test_run_dispatches_to_correct_tool(): tool_a = FakeTool("a", ToolResult(success=True, data="from a")) tool_b = FakeTool("b", ToolResult(success=True, data="from b")) registry = ToolRegistry([tool_a, tool_b]) - result = asyncio.run(registry.run("b", {})) + result = await registry.run("b", {}) assert result.success is True assert result.data == "from b" -def test_passes_raw_dict_to_tool_unchanged(): +async def test_passes_raw_dict_to_tool_unchanged(): tool = FakeTool("t") registry = ToolRegistry([tool]) raw = {"key": "value", "num": 42} - asyncio.run(registry.run("t", raw)) + await registry.run("t", raw) assert tool.last_raw == raw -def test_returns_tool_result_on_tool_failure(): +async def test_returns_tool_result_on_tool_failure(): registry = ToolRegistry([FakeTool("t", ToolResult(success=False, error="bad input"))]) - result = asyncio.run(registry.run("t", {})) + result = await registry.run("t", {}) assert result.success is False assert result.error == "bad input" -def test_unknown_tool_returns_failure(): +async def test_unknown_tool_returns_failure(): registry = ToolRegistry([FakeTool("known_tool")]) - result = asyncio.run(registry.run("unknown_tool", {})) + result = await registry.run("unknown_tool", {}) assert result.success is False assert "Unknown tool: 'unknown_tool'" in result.error -def test_empty_registry_always_returns_unknown_error(): +async def test_empty_registry_always_returns_unknown_error(): registry = ToolRegistry([]) - result = asyncio.run(registry.run("anything", {})) + result = await registry.run("anything", {}) assert result.success is False assert result.error is not None diff --git a/ddev/tests/ai/tools/fs/conftest.py b/ddev/tests/ai/tools/fs/conftest.py index 8d6677b98c398..12ae9e34eb1d5 100644 --- a/ddev/tests/ai/tools/fs/conftest.py +++ b/ddev/tests/ai/tools/fs/conftest.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio import pytest @@ -38,8 +37,8 @@ def append_tool(registry: FileRegistry) -> AppendFileTool: @pytest.fixture -def known_file(tmp_path, create_tool: CreateFileTool): +async def known_file(tmp_path, create_tool: CreateFileTool): """A temp file registered in the registry via create.""" f = tmp_path / "file.txt" - asyncio.run(create_tool.run({"path": str(f), "content": "line one\nline two\nline three\n"})) + await create_tool.run({"path": str(f), "content": "line one\nline two\nline three\n"}) return f diff --git a/ddev/tests/ai/tools/fs/test_append_file.py b/ddev/tests/ai/tools/fs/test_append_file.py index 2b669572d30bb..289142e378191 100644 --- a/ddev/tests/ai/tools/fs/test_append_file.py +++ b/ddev/tests/ai/tools/fs/test_append_file.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import patch import pytest @@ -23,8 +22,10 @@ def test_tool_name(registry: FileRegistry) -> None: ("A\r\nB\r\n", "A\nB\n", "\r"), ], ) -def test_append_file_success(append_tool: AppendFileTool, known_file, content, expected_in, expected_not_in) -> None: - result = asyncio.run(append_tool.run({"path": str(known_file), "content": content})) +async def test_append_file_success( + append_tool: AppendFileTool, known_file, content, expected_in, expected_not_in +) -> None: + result = await append_tool.run({"path": str(known_file), "content": content}) assert result.success is True text = known_file.read_text(encoding="utf-8") @@ -33,11 +34,11 @@ def test_append_file_success(append_tool: AppendFileTool, known_file, content, e assert expected_not_in not in text -def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tmp_path) -> None: +async def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tmp_path) -> None: f = tmp_path / "unread.txt" f.write_text("content", encoding="utf-8") - result = asyncio.run(append_tool.run({"path": str(f), "content": "more"})) + result = await append_tool.run({"path": str(f), "content": "more"}) assert result.success is False assert "Not authorized" in result.error @@ -50,39 +51,39 @@ def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tm ("", "first line", "first line"), ], ) -def test_append_file_separator( +async def test_append_file_separator( append_tool: AppendFileTool, create_tool: CreateFileTool, tmp_path, initial, appended, expected ) -> None: f = tmp_path / "file.txt" - asyncio.run(create_tool.run({"path": str(f), "content": initial})) + await create_tool.run({"path": str(f), "content": initial}) - result = asyncio.run(append_tool.run({"path": str(f), "content": appended})) + result = await append_tool.run({"path": str(f), "content": appended}) assert result.success is True assert f.read_text(encoding="utf-8") == expected -def test_append_file_fails_if_file_changed_externally(append_tool: AppendFileTool, known_file) -> None: +async def test_append_file_fails_if_file_changed_externally(append_tool: AppendFileTool, known_file) -> None: known_file.write_text("externally modified\n", encoding="utf-8") - result = asyncio.run(append_tool.run({"path": str(known_file), "content": "more"})) + result = await append_tool.run({"path": str(known_file), "content": "more"}) assert result.success is False assert "Re-read and retry" in result.error -def test_append_file_updates_registry(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None: - asyncio.run(append_tool.run({"path": str(known_file), "content": "extra\n"})) +async def test_append_file_updates_registry(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None: + await append_tool.run({"path": str(known_file), "content": "extra\n"}) new_content = known_file.read_text(encoding="utf-8") assert registry.verify(str(known_file), new_content) is True -def test_append_file_oserror_on_write(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None: +async def test_append_file_oserror_on_write(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None: original_content = known_file.read_text(encoding="utf-8") with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")): - result = asyncio.run(append_tool.run({"path": str(known_file), "content": "new line"})) + result = await append_tool.run({"path": str(known_file), "content": "new line"}) assert result.success is False assert result.error is not None diff --git a/ddev/tests/ai/tools/fs/test_create_file.py b/ddev/tests/ai/tools/fs/test_create_file.py index 2714ef5bb06aa..8b0c0296fa38a 100644 --- a/ddev/tests/ai/tools/fs/test_create_file.py +++ b/ddev/tests/ai/tools/fs/test_create_file.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import patch from ddev.ai.tools.fs.create_file import CreateFileTool @@ -12,41 +11,41 @@ def test_tool_name(registry: FileRegistry) -> None: assert CreateFileTool(registry).name == "create_file" -def test_create_file_success(create_tool: CreateFileTool, tmp_path) -> None: +async def test_create_file_success(create_tool: CreateFileTool, tmp_path) -> None: f = tmp_path / "new.txt" - result = asyncio.run(create_tool.run({"path": str(f), "content": "hello"})) + result = await create_tool.run({"path": str(f), "content": "hello"}) assert result.success is True assert f.read_text(encoding="utf-8") == "hello" -def test_create_file_default_empty_content(create_tool: CreateFileTool, tmp_path) -> None: +async def test_create_file_default_empty_content(create_tool: CreateFileTool, tmp_path) -> None: f = tmp_path / "empty.txt" - result = asyncio.run(create_tool.run({"path": str(f)})) + result = await create_tool.run({"path": str(f)}) assert result.success is True assert f.read_text(encoding="utf-8") == "" -def test_create_file_creates_missing_parent_dirs(create_tool: CreateFileTool, tmp_path) -> None: +async def test_create_file_creates_missing_parent_dirs(create_tool: CreateFileTool, tmp_path) -> None: f = tmp_path / "a" / "b" / "c" / "file.txt" - result = asyncio.run(create_tool.run({"path": str(f), "content": "nested"})) + result = await create_tool.run({"path": str(f), "content": "nested"}) assert result.success is True assert f.exists() assert f.read_text(encoding="utf-8") == "nested" -def test_create_file_fails_if_file_already_exists( +async def test_create_file_fails_if_file_already_exists( create_tool: CreateFileTool, registry: FileRegistry, tmp_path ) -> None: f = tmp_path / "existing.txt" f.write_text("original", encoding="utf-8") - result = asyncio.run(create_tool.run({"path": str(f), "content": "new"})) + result = await create_tool.run({"path": str(f), "content": "new"}) assert result.success is False assert result.error is not None @@ -54,19 +53,19 @@ def test_create_file_fails_if_file_already_exists( assert not registry.is_known(str(f)) -def test_create_tool_registers_in_registry(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: +async def test_create_tool_registers_in_registry(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: f = tmp_path / "file.txt" - asyncio.run(create_tool.run({"path": str(f), "content": "hi"})) + await create_tool.run({"path": str(f), "content": "hi"}) assert registry.is_known(str(f)) is True assert registry.verify(str(f), "hi") is True -def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: +async def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: f = tmp_path / "a" / "b" / "new.txt" with patch("pathlib.Path.mkdir", side_effect=PermissionError("permission denied")): - result = asyncio.run(create_tool.run({"path": str(f), "content": "hi"})) + result = await create_tool.run({"path": str(f), "content": "hi"}) assert result.success is False assert result.error is not None @@ -74,11 +73,11 @@ def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: Fil assert not registry.is_known(str(f)) -def test_create_file_oserror_on_write(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: +async def test_create_file_oserror_on_write(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None: f = tmp_path / "new.txt" with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")): - result = asyncio.run(create_tool.run({"path": str(f), "content": "hi"})) + result = await create_tool.run({"path": str(f), "content": "hi"}) assert result.success is False assert result.error is not None diff --git a/ddev/tests/ai/tools/fs/test_edit_file.py b/ddev/tests/ai/tools/fs/test_edit_file.py index cbfd48a78c193..27c8b87cedce2 100644 --- a/ddev/tests/ai/tools/fs/test_edit_file.py +++ b/ddev/tests/ai/tools/fs/test_edit_file.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import patch import pytest @@ -15,8 +14,8 @@ def test_tool_name(registry: FileRegistry) -> None: assert EditFileTool(registry).name == "edit_file" -def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None: - result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line two", "new_string": "line TWO"})) +async def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None: + result = await edit_tool.run({"path": str(known_file), "old_string": "line two", "new_string": "line TWO"}) assert result.success is True content = known_file.read_text(encoding="utf-8") @@ -24,54 +23,56 @@ def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None: assert "line two" not in content -def test_edit_file_deletes_line(edit_tool: EditFileTool, known_file) -> None: - result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line two\n", "new_string": ""})) +async def test_edit_file_deletes_line(edit_tool: EditFileTool, known_file) -> None: + result = await edit_tool.run({"path": str(known_file), "old_string": "line two\n", "new_string": ""}) assert result.success is True assert "line two" not in known_file.read_text(encoding="utf-8") -def test_edit_file_fails_for_unregistered_file(edit_tool: EditFileTool, tmp_path) -> None: +async def test_edit_file_fails_for_unregistered_file(edit_tool: EditFileTool, tmp_path) -> None: f = tmp_path / "unread.txt" f.write_text("content", encoding="utf-8") - result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "content", "new_string": "new"})) + result = await edit_tool.run({"path": str(f), "old_string": "content", "new_string": "new"}) assert result.success is False assert "Not authorized" in result.error @pytest.mark.parametrize("old_string", ["does not exist", ""]) -def test_edit_file_fails_if_old_string_not_found_or_empty(edit_tool: EditFileTool, known_file, old_string) -> None: - result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": old_string, "new_string": "x"})) +async def test_edit_file_fails_if_old_string_not_found_or_empty( + edit_tool: EditFileTool, known_file, old_string +) -> None: + result = await edit_tool.run({"path": str(known_file), "old_string": old_string, "new_string": "x"}) assert result.success is False -def test_edit_file_fails_if_old_string_ambiguous( +async def test_edit_file_fails_if_old_string_ambiguous( edit_tool: EditFileTool, create_tool: CreateFileTool, tmp_path ) -> None: f = tmp_path / "dup.txt" - asyncio.run(create_tool.run({"path": str(f), "content": "foo\nfoo\nfoo\n"})) + await create_tool.run({"path": str(f), "content": "foo\nfoo\nfoo\n"}) - result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "foo", "new_string": "bar"})) + result = await edit_tool.run({"path": str(f), "old_string": "foo", "new_string": "bar"}) assert result.success is False assert "3" in result.error assert result.hint is not None -def test_edit_file_fails_if_file_changed_externally(edit_tool: EditFileTool, known_file) -> None: +async def test_edit_file_fails_if_file_changed_externally(edit_tool: EditFileTool, known_file) -> None: known_file.write_text("externally modified\n", encoding="utf-8") - result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"})) + result = await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"}) assert result.success is False assert "Re-read and retry" in result.error -def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None: - asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "LINE ONE"})) +async def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None: + await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "LINE ONE"}) new_content = known_file.read_text(encoding="utf-8") assert registry.verify(str(known_file), new_content) is True @@ -85,23 +86,23 @@ def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegis ("line one\n", "line one", "A\r\nB", "A\nB\n"), # CRLF in new_string ], ) -def test_edit_file_normalizes_crlf( +async def test_edit_file_normalizes_crlf( edit_tool: EditFileTool, create_tool: CreateFileTool, tmp_path, file_content, old_string, new_string, expected ) -> None: f = tmp_path / "file.txt" - asyncio.run(create_tool.run({"path": str(f), "content": file_content})) + await create_tool.run({"path": str(f), "content": file_content}) - result = asyncio.run(edit_tool.run({"path": str(f), "old_string": old_string, "new_string": new_string})) + result = await edit_tool.run({"path": str(f), "old_string": old_string, "new_string": new_string}) assert result.success is True assert f.read_text(encoding="utf-8") == expected -def test_edit_file_oserror_on_write(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None: +async def test_edit_file_oserror_on_write(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None: original_content = known_file.read_text(encoding="utf-8") with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")): - result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"})) + result = await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"}) assert result.success is False assert result.error is not None diff --git a/ddev/tests/ai/tools/fs/test_read_file.py b/ddev/tests/ai/tools/fs/test_read_file.py index f1b8da06d91ed..f2497e6c09a18 100644 --- a/ddev/tests/ai/tools/fs/test_read_file.py +++ b/ddev/tests/ai/tools/fs/test_read_file.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import patch import pytest @@ -14,47 +13,47 @@ def test_tool_name(registry: FileRegistry) -> None: assert ReadFileTool(registry).name == "read_file" -def test_read_file_success(read_tool: ReadFileTool, tmp_path) -> None: +async def test_read_file_success(read_tool: ReadFileTool, tmp_path) -> None: f = tmp_path / "config.txt" f.write_text("hello\nworld\n", encoding="utf-8") - result = asyncio.run(read_tool.run({"path": str(f)})) + result = await read_tool.run({"path": str(f)}) assert result.success is True assert result.data == "0: hello\n1: world\n" -def test_read_registers_unknown_file(read_tool: ReadFileTool, registry: FileRegistry, tmp_path) -> None: +async def test_read_registers_unknown_file(read_tool: ReadFileTool, registry: FileRegistry, tmp_path) -> None: f = tmp_path / "file.txt" f.write_text("content", encoding="utf-8") - asyncio.run(read_tool.run({"path": str(f)})) + await read_tool.run({"path": str(f)}) assert registry.is_known(str(f)) is True -def test_read_file_missing_file(read_tool: ReadFileTool, tmp_path) -> None: - result = asyncio.run(read_tool.run({"path": str(tmp_path / "ghost.txt")})) +async def test_read_file_missing_file(read_tool: ReadFileTool, tmp_path) -> None: + result = await read_tool.run({"path": str(tmp_path / "ghost.txt")}) assert result.success is False assert str(tmp_path / "ghost.txt") in result.error -def test_read_file_permission_error(read_tool: ReadFileTool, tmp_path) -> None: +async def test_read_file_permission_error(read_tool: ReadFileTool, tmp_path) -> None: f = tmp_path / "secret.txt" f.write_text("secret", encoding="utf-8") with patch("pathlib.Path.read_text", side_effect=PermissionError("permission denied")): - result = asyncio.run(read_tool.run({"path": str(f)})) + result = await read_tool.run({"path": str(f)}) assert result.success is False assert result.error is not None -def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None: +async def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None: f = tmp_path / "binary.bin" f.write_bytes(b"\xff\xfe\x00binary") - result = asyncio.run(read_tool.run({"path": str(f)})) + result = await read_tool.run({"path": str(f)}) assert result.success is False assert result.error is not None @@ -71,23 +70,23 @@ def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None: (100, None, ""), # offset beyond EOF ], ) -def test_read_file_with_offset_and_limit(read_tool: ReadFileTool, tmp_path, offset, limit, expected) -> None: +async def test_read_file_with_offset_and_limit(read_tool: ReadFileTool, tmp_path, offset, limit, expected) -> None: f = tmp_path / "file.txt" f.write_text("a\nb\nc\n", encoding="utf-8") - result = asyncio.run(read_tool.run({"path": str(f), "offset": offset, "limit": limit})) + result = await read_tool.run({"path": str(f), "offset": offset, "limit": limit}) assert result.success is True assert result.data == expected -def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None: +async def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None: from ddev.ai.tools.core.truncation import MAX_CHARS f = tmp_path / "large.txt" f.write_text("x" * (MAX_CHARS + 1000), encoding="utf-8") - result = asyncio.run(read_tool.run({"path": str(f)})) + result = await read_tool.run({"path": str(f)}) assert result.success is True assert result.truncated is True @@ -95,11 +94,11 @@ def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None: assert result.hint is not None -def test_read_file_no_trailing_newline(read_tool: ReadFileTool, tmp_path) -> None: +async def test_read_file_no_trailing_newline(read_tool: ReadFileTool, tmp_path) -> None: f = tmp_path / "file.txt" f.write_text("no newline at end", encoding="utf-8") - result = asyncio.run(read_tool.run({"path": str(f)})) + result = await read_tool.run({"path": str(f)}) assert result.success is True assert result.data == "0: no newline at end" diff --git a/ddev/tests/ai/tools/fs/test_workflow.py b/ddev/tests/ai/tools/fs/test_workflow.py index 077f63189bf91..a45ad9d937e26 100644 --- a/ddev/tests/ai/tools/fs/test_workflow.py +++ b/ddev/tests/ai/tools/fs/test_workflow.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from ddev.ai.tools.fs.append_file import AppendFileTool from ddev.ai.tools.fs.create_file import CreateFileTool @@ -10,7 +9,7 @@ from ddev.ai.tools.fs.read_file import ReadFileTool -def test_workflow_create_read_edit_append( +async def test_workflow_create_read_edit_append( create_tool: CreateFileTool, read_tool: ReadFileTool, edit_tool: EditFileTool, @@ -21,20 +20,20 @@ def test_workflow_create_read_edit_append( f = tmp_path / "workflow.txt" # Step 1: create - r = asyncio.run(create_tool.run({"path": str(f), "content": "version: 1\n"})) + r = await create_tool.run({"path": str(f), "content": "version: 1\n"}) assert r.success is True # Step 2: read (registers current content) - r = asyncio.run(read_tool.run({"path": str(f)})) + r = await read_tool.run({"path": str(f)}) assert r.success is True # Step 3: edit - r = asyncio.run(edit_tool.run({"path": str(f), "old_string": "version: 1", "new_string": "version: 2"})) + r = await edit_tool.run({"path": str(f), "old_string": "version: 1", "new_string": "version: 2"}) assert r.success is True assert "version: 2" in f.read_text(encoding="utf-8") # Step 4: append - r = asyncio.run(append_tool.run({"path": str(f), "content": "# updated\n"})) + r = await append_tool.run({"path": str(f), "content": "# updated\n"}) assert r.success is True assert f.read_text(encoding="utf-8").endswith("# updated\n") @@ -42,22 +41,22 @@ def test_workflow_create_read_edit_append( assert registry.verify(str(f), f.read_text(encoding="utf-8")) is True -def test_workflow_stale_file( +async def test_workflow_stale_file( create_tool: CreateFileTool, read_tool: ReadFileTool, edit_tool: EditFileTool, tmp_path, ) -> None: f = tmp_path / "shared.txt" - asyncio.run(create_tool.run({"path": str(f), "content": "original\n"})) + await create_tool.run({"path": str(f), "content": "original\n"}) f.write_text("updated externally\n", encoding="utf-8") - result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "original", "new_string": "my edit"})) + result = await edit_tool.run({"path": str(f), "old_string": "original", "new_string": "my edit"}) assert result.success is False assert "Re-read and retry" in result.error - asyncio.run(read_tool.run({"path": str(f)})) + await read_tool.run({"path": str(f)}) - result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "updated externally", "new_string": "final"})) + result = await edit_tool.run({"path": str(f), "old_string": "updated externally", "new_string": "final"}) assert result.success is True assert f.read_text(encoding="utf-8") == "final\n" diff --git a/ddev/tests/ai/tools/http/test_http_get.py b/ddev/tests/ai/tools/http/test_http_get.py index d2e8c06220fa1..2cb871bdfd62a 100644 --- a/ddev/tests/ai/tools/http/test_http_get.py +++ b/ddev/tests/ai/tools/http/test_http_get.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import AsyncMock, MagicMock, patch import httpx @@ -51,8 +50,8 @@ def test_tool_meta(http_tool: HttpGetTool) -> None: @pytest.mark.parametrize("url", ["ftp://example.com", "example.com", "", "//example.com"]) -def test_invalid_url(http_tool: HttpGetTool, url: str) -> None: - result = asyncio.run(http_tool.run({"url": url})) +async def test_invalid_url(http_tool: HttpGetTool, url: str) -> None: + result = await http_tool.run({"url": url}) assert result.success is False assert "http" in result.error and "https" in result.error @@ -71,9 +70,9 @@ def test_invalid_url(http_tool: HttpGetTool, url: str) -> None: (204, ""), ], ) -def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) -> None: +async def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) -> None: with patch_httpx(fake_response(status_code, body)): - result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"})) + result = await http_tool.run({"url": "http://localhost:9090/metrics"}) assert result.success is True assert f"Status: {status_code}" in result.data @@ -81,9 +80,9 @@ def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) -> @pytest.mark.parametrize("status_code", [400, 404, 500, 503]) -def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) -> None: +async def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) -> None: with patch_httpx(fake_response(status_code, "error body")): - result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"})) + result = await http_tool.run({"url": "http://localhost:9090/metrics"}) assert result.success is True assert f"Status: {status_code}" in result.data @@ -94,17 +93,17 @@ def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) -> # --------------------------------------------------------------------------- -def test_request_timeout(http_tool: HttpGetTool) -> None: +async def test_request_timeout(http_tool: HttpGetTool) -> None: with patch_httpx(side_effect=httpx.TimeoutException("timed out")): - result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics", "timeout": 1.0})) + result = await http_tool.run({"url": "http://localhost:9090/metrics", "timeout": 1.0}) assert result.success is False assert "timed out after 1.0s" in result.error -def test_request_error(http_tool: HttpGetTool) -> None: +async def test_request_error(http_tool: HttpGetTool) -> None: with patch_httpx(side_effect=httpx.RequestError("connection refused")): - result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"})) + result = await http_tool.run({"url": "http://localhost:9090/metrics"}) assert result.success is False assert "Request failed" in result.error @@ -116,12 +115,12 @@ def test_request_error(http_tool: HttpGetTool) -> None: @pytest.mark.parametrize("status_code", [200, 500]) -def test_response_truncated(http_tool: HttpGetTool, status_code: int) -> None: +async def test_response_truncated(http_tool: HttpGetTool, status_code: int) -> None: from ddev.ai.tools.core.truncation import MAX_CHARS large_body = "x" * (MAX_CHARS + 1000) with patch_httpx(fake_response(status_code, large_body)): - result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"})) + result = await http_tool.run({"url": "http://localhost:9090/metrics"}) assert result.success is True assert result.truncated is True diff --git a/ddev/tests/ai/tools/shell/test_base.py b/ddev/tests/ai/tools/shell/test_base.py index 5d7431239a5e7..3568170b9092d 100644 --- a/ddev/tests/ai/tools/shell/test_base.py +++ b/ddev/tests/ai/tools/shell/test_base.py @@ -79,42 +79,42 @@ def slow_greet_tool() -> SlowGreetTool: # --------------------------------------------------------------------------- -def test_run_command_success(proc): +async def test_run_command_success(proc): with patch_proc(proc): - result = asyncio.run(run_command(["echo", "hello"])) + result = await run_command(["echo", "hello"]) assert result.success is True assert result.data == "hello\n" assert result.truncated is False -def test_run_command_failure_combines_stdout_and_stderr(): +async def test_run_command_failure_combines_stdout_and_stderr(): proc = make_proc(returncode=1, stdout=b"partial\n", stderr=b"error\n") with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.success is False assert "partial" in result.data assert "error" in result.data -def test_run_command_failure_stderr_only_when_no_stdout(): +async def test_run_command_failure_stderr_only_when_no_stdout(): proc = make_proc(returncode=1, stdout=b"", stderr=b"fatal error\n") with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.success is False and result.data == "fatal error\n" -def test_run_command_ignores_stderr_on_zero_exit(): +async def test_run_command_ignores_stderr_on_zero_exit(): proc = make_proc(returncode=0, stdout=b"out\n", stderr=b"warning\n") with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.success is True assert "warning" not in result.data -def test_run_command_stderr_included_when_stdout_empty_on_success(): +async def test_run_command_stderr_included_when_stdout_empty_on_success(): proc = make_proc(returncode=0, stdout=b"", stderr=b"info: initialized\n") with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.success is True assert result.data == "info: initialized\n" @@ -127,10 +127,10 @@ def test_run_command_stderr_included_when_stdout_empty_on_success(): (1, b"", b""), ], ) -def test_run_command_empty_output(returncode, stdout, stderr): +async def test_run_command_empty_output(returncode, stdout, stderr): proc = make_proc(returncode=returncode, stdout=stdout, stderr=stderr) with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.data == "(no output)" @@ -139,27 +139,27 @@ def test_run_command_empty_output(returncode, stdout, stderr): # --------------------------------------------------------------------------- -def test_run_command_not_found(): +async def test_run_command_not_found(): with patch("asyncio.create_subprocess_exec", side_effect=FileNotFoundError()): - result = asyncio.run(run_command(["nonexistent"])) + result = await run_command(["nonexistent"]) assert result.success is False assert "Command not found" in result.error assert "nonexistent" in result.error -def test_run_command_timeout(): +async def test_run_command_timeout(): proc = make_proc() with patch_proc(proc): with patch("asyncio.wait_for", new=_raise_timeout): - result = asyncio.run(run_command(["sleep", "100"], timeout=5)) + result = await run_command(["sleep", "100"], timeout=5) assert result.success is False assert "5s" in result.error proc.kill.assert_called_once() -def test_run_command_unexpected_exception(): +async def test_run_command_unexpected_exception(): with patch("asyncio.create_subprocess_exec", side_effect=OSError("permission denied")): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.success is False assert "OSError" in result.error assert "permission denied" in result.error @@ -170,21 +170,21 @@ def test_run_command_unexpected_exception(): # --------------------------------------------------------------------------- -def test_run_command_truncation(): +async def test_run_command_truncation(): large = ("x" * 80 + "\n") * 700 proc = make_proc(stdout=large.encode()) with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.truncated is True assert result.total_size == len(large) assert result.shown_size == len(result.data) assert result.hint is not None -def test_run_command_no_truncation_at_limit(): +async def test_run_command_no_truncation_at_limit(): proc = make_proc(stdout=("x" * MAX_CHARS).encode()) with patch_proc(proc): - result = asyncio.run(run_command(["cmd"])) + result = await run_command(["cmd"]) assert result.truncated is False assert result.total_size is None assert result.hint is None @@ -200,10 +200,10 @@ def test_cmd_tool_timeouts(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool assert SlowGreetTool.timeout == 60 # custom timeout -def test_cmd_tool_dispatches_with_correct_timeout(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool): +async def test_cmd_tool_dispatches_with_correct_timeout(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool): for tool, expected_timeout in [(greet_tool, 10), (slow_greet_tool, 60)]: with patch( "ddev.ai.tools.shell.base.run_command", new=AsyncMock(return_value=ToolResult(success=True)) ) as mock_run: - asyncio.run(tool.run({"name": "world"})) + await tool.run({"name": "world"}) mock_run.assert_called_once_with(["echo", "hello world"], timeout=expected_timeout) diff --git a/ddev/tests/ai/tools/shell/test_tools.py b/ddev/tests/ai/tools/shell/test_tools.py index 81fcb45d3d3b1..05084acc97e9e 100644 --- a/ddev/tests/ai/tools/shell/test_tools.py +++ b/ddev/tests/ai/tools/shell/test_tools.py @@ -1,7 +1,6 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import asyncio from unittest.mock import AsyncMock, patch import pytest @@ -66,12 +65,12 @@ def test_grep_cmd_pattern_and_path_placement(grep_tool: GrepTool): assert cmd[-1] == "/my dir/sub dir" -def test_grep_no_matches_returns_success(grep_tool: GrepTool): +async def test_grep_no_matches_returns_success(grep_tool: GrepTool): from ddev.ai.tools.core.types import ToolResult no_match_result = ToolResult(success=False, data="(no output)", error=None) with patch("ddev.ai.tools.shell.grep.run_command", new=AsyncMock(return_value=no_match_result)): - result = asyncio.run(grep_tool(GrepInput(pattern="nomatch", path="/tmp"))) + result = await grep_tool(GrepInput(pattern="nomatch", path="/tmp")) assert result.success is True assert result.data == "(no output)"