From 9e4d08b4188bf6eaffeb0dc1b0e2ab3bd3a75fbf Mon Sep 17 00:00:00 2001
From: Luis Orofino <luis.orofino@datadoghq.com>
Date: Wed, 25 Mar 2026 16:03:00 +0100
Subject: [PATCH 1/5] Implement AnthropicAgent, defined types and created tests

---
 ddev/src/ddev/ai/agent/__init__.py |   3 +
 ddev/src/ddev/ai/agent/agent.py    | 125 +++++++++
 ddev/src/ddev/ai/agent/types.py    |  73 ++++++
 ddev/tests/ai/agent/__init__.py    |   3 +
 ddev/tests/ai/agent/test_agent.py  | 408 +++++++++++++++++++++++++++++
 5 files changed, 612 insertions(+)
 create mode 100644 ddev/src/ddev/ai/agent/__init__.py
 create mode 100644 ddev/src/ddev/ai/agent/agent.py
 create mode 100644 ddev/src/ddev/ai/agent/types.py
 create mode 100644 ddev/tests/ai/agent/__init__.py
 create mode 100644 ddev/tests/ai/agent/test_agent.py

diff --git a/ddev/src/ddev/ai/agent/__init__.py b/ddev/src/ddev/ai/agent/__init__.py
new file mode 100644
index 0000000000000..75c6647cb9233
--- /dev/null
+++ b/ddev/src/ddev/ai/agent/__init__.py
@@ -0,0 +1,3 @@
+# (C) Datadog, Inc. 2026-present
+# All rights reserved
+# Licensed under a 3-clause BSD style license (see LICENSE)
diff --git a/ddev/src/ddev/ai/agent/agent.py b/ddev/src/ddev/ai/agent/agent.py
new file mode 100644
index 0000000000000..4cbed20072f26
--- /dev/null
+++ b/ddev/src/ddev/ai/agent/agent.py
@@ -0,0 +1,125 @@
+# (C) Datadog, Inc. 2026-present
+# All rights reserved
+# Licensed under a 3-clause BSD style license (see LICENSE)
+
+from typing import Final
+
+import anthropic
+from anthropic.types import MessageParam, ToolResultBlockParam
+
+from ddev.ai.tools.core.registry import ToolRegistry
+
+from .types import (
+    AgentAPIError,
+    AgentConnectionError,
+    AgentError,
+    AgentRateLimitError,
+    AgentResponse,
+    StopReason,
+    TokenUsage,
+    ToolCall,
+)
+
+MODEL: Final[str] = "claude-opus-4-6"
+MAX_TOKENS: Final[int] = 8192
+
+
+class AnthropicAgent:
+    def __init__(
+        self,
+        client: anthropic.AsyncAnthropic,
+        tools: ToolRegistry,
+        system_prompt: str,
+        name: str,
+        model: str = MODEL,
+        max_tokens: int = MAX_TOKENS,
+    ) -> None:
+        self._client = client
+        self._tools = tools
+        self._system_prompt = system_prompt
+        self.name = name
+        self._model = model
+        self._max_tokens = max_tokens
+        self._history: list[MessageParam] = []
+
+    @property
+    def history(self) -> list[MessageParam]:
+        """Read-only snapshot of the conversation history."""
+        return list(self._history)
+
+    def reset(self) -> None:
+        """Clear conversation history to start a new conversation."""
+        self._history = []
+
+    def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list:
+        """Filter tool definitions by allowlist. None means all tools."""
+        if allowed_tools is None:
+            return self._tools.definitions
+        allowed = set(allowed_tools)
+        return [d for d in self._tools.definitions if d["name"] in allowed]
+
+    async def send(
+        self,
+        content: str | list[ToolResultBlockParam],
+        allowed_tools: list[str] | None = None,
+    ) -> AgentResponse:
+        tool_defs = self._get_tool_definitions(allowed_tools)
+
+        user_msg: MessageParam = {"role": "user", "content": content}
+        messages = [*self._history, user_msg]
+
+        try:
+            response = await self._client.messages.create(
+                model=self._model,
+                max_tokens=self._max_tokens,
+                system=self._system_prompt,
+                messages=messages,
+                tools=tool_defs if tool_defs else anthropic.NOT_GIVEN,
+            )
+        except anthropic.APIConnectionError as e:
+            raise AgentConnectionError(f"Connection failed: {e}") from e
+        except anthropic.RateLimitError as e:
+            raise AgentRateLimitError(f"Rate limit exceeded: {e}") from e
+        except anthropic.APIStatusError as e:
+            raise AgentAPIError(e.status_code, e.message) from e
+        except anthropic.APIResponseValidationError as e:
+            raise AgentError(f"Response validation failed: {e}") from e
+
+        # stop_reason is None only in streaming responses; we use non-streaming, so None is unexpected
+        if response.stop_reason is None:
+            raise AgentError("Received null stop_reason from API")
+
+        try:
+            stop_reason = StopReason(response.stop_reason)
+        except ValueError:
+            raise AgentError(f"Unknown stop_reason: {response.stop_reason!r}") from None
+
+        text_parts: list[str] = []
+        tool_calls: list[ToolCall] = []
+
+        for block in response.content:
+            if isinstance(block, anthropic.types.TextBlock):
+                text_parts.append(block.text)
+            elif isinstance(block, anthropic.types.ToolUseBlock):
+                tool_calls.append(ToolCall(id=block.id, name=block.name, input=dict(block.input)))
+        # ThinkingBlock and RedactedThinkingBlock are intentionally ignored.
+        # Extended thinking support can add a `thinking: str` field to AgentResponse later.
+
+        usage = TokenUsage(
+            input_tokens=response.usage.input_tokens,
+            output_tokens=response.usage.output_tokens,
+            cache_read_input_tokens=response.usage.cache_read_input_tokens or 0,
+            cache_creation_input_tokens=response.usage.cache_creation_input_tokens or 0,
+        )
+
+        agent_response = AgentResponse(
+            stop_reason=stop_reason,
+            text="".join(text_parts),
+            tool_calls=tool_calls,
+            usage=usage,
+        )
+
+        # Save to history only after a successful response.
+        self._history = [*messages, {"role": "assistant", "content": response.content}]
+
+        return agent_response
diff --git a/ddev/src/ddev/ai/agent/types.py b/ddev/src/ddev/ai/agent/types.py
new file mode 100644
index 0000000000000..677ab92d25350
--- /dev/null
+++ b/ddev/src/ddev/ai/agent/types.py
@@ -0,0 +1,73 @@
+# (C) Datadog, Inc. 2026-present
+# All rights reserved
+# Licensed under a 3-clause BSD style license (see LICENSE)
+
+from dataclasses import dataclass
+from enum import StrEnum
+
+from pydantic import BaseModel
+
+
+class StopReason(StrEnum):
+    """Maps Anthropic API stop_reason strings to a typed enum."""
+
+    END_TURN = "end_turn"
+    MAX_TOKENS = "max_tokens"
+    STOP_SEQUENCE = "stop_sequence"
+    TOOL_USE = "tool_use"
+    PAUSE_TURN = "pause_turn"
+    REFUSAL = "refusal"
+
+
+@dataclass(frozen=True)
+class ToolCall:
+    """A single tool invocation requested by the model."""
+
+    id: str
+    name: str
+    input: dict[str, object]
+
+
+@dataclass(frozen=True)
+class TokenUsage:
+    """Token accounting from a single API call."""
+
+    input_tokens: int
+    output_tokens: int
+    cache_read_input_tokens: int
+    cache_creation_input_tokens: int
+
+
+class AgentResponse(BaseModel):
+    """The complete response from a single AnthropicAgent.send() call."""
+
+    stop_reason: StopReason
+    text: str
+    tool_calls: list[ToolCall]
+    usage: TokenUsage
+
+
+class AgentError(Exception):
+    """Base class for all errors raised by AnthropicAgent."""
+
+    pass
+
+
+class AgentConnectionError(AgentError):
+    """Network failure — the API was unreachable."""
+
+    pass
+
+
+class AgentRateLimitError(AgentError):
+    """Rate limit hit — the request may be retried after a delay."""
+
+    pass
+
+
+class AgentAPIError(AgentError):
+    """The API returned an error status code."""
+
+    def __init__(self, status_code: int, message: str) -> None:
+        super().__init__(message)
+        self.status_code = status_code
diff --git a/ddev/tests/ai/agent/__init__.py b/ddev/tests/ai/agent/__init__.py
new file mode 100644
index 0000000000000..75c6647cb9233
--- /dev/null
+++ b/ddev/tests/ai/agent/__init__.py
@@ -0,0 +1,3 @@
+# (C) Datadog, Inc. 2026-present
+# All rights reserved
+# Licensed under a 3-clause BSD style license (see LICENSE)
diff --git a/ddev/tests/ai/agent/test_agent.py b/ddev/tests/ai/agent/test_agent.py
new file mode 100644
index 0000000000000..7fa04103b9328
--- /dev/null
+++ b/ddev/tests/ai/agent/test_agent.py
@@ -0,0 +1,408 @@
+# (C) Datadog, Inc. 2026-present
+# All rights reserved
+# Licensed under a 3-clause BSD style license (see LICENSE)
+
+import asyncio
+from collections.abc import Callable
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import anthropic
+import pytest
+
+from ddev.ai.agent.agent import AnthropicAgent
+from ddev.ai.agent.types import (
+    AgentAPIError,
+    AgentConnectionError,
+    AgentError,
+    AgentRateLimitError,
+    StopReason,
+)
+from ddev.ai.tools.core.registry import ToolRegistry
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def make_usage(
+    input_tokens: int = 10,
+    output_tokens: int = 20,
+    cache_read: int | None = None,
+    cache_creation: int | None = None,
+) -> SimpleNamespace:
+    return SimpleNamespace(
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        cache_read_input_tokens=cache_read,
+        cache_creation_input_tokens=cache_creation,
+    )
+
+
+def make_text_block(text: str) -> anthropic.types.TextBlock:
+    return anthropic.types.TextBlock(type="text", text=text)
+
+
+def make_tool_use_block(
+    id: str = "toolu_01",
+    name: str = "read_file",
+    input: dict | None = None,
+) -> anthropic.types.ToolUseBlock:
+    return anthropic.types.ToolUseBlock(
+        type="tool_use",
+        id=id,
+        name=name,
+        input=input or {"path": "/tmp/file.txt"},
+    )
+
+
+def make_response(
+    stop_reason: str | None,
+    content: list,
+    usage: SimpleNamespace | None = None,
+) -> SimpleNamespace:
+    return SimpleNamespace(
+        stop_reason=stop_reason,
+        content=content,
+        usage=usage or make_usage(),
+    )
+
+
+def make_agent(
+    tools: ToolRegistry | None = None,
+    mock_response: SimpleNamespace | None = None,
+) -> tuple[AnthropicAgent, AsyncMock]:
+    client = MagicMock(spec=anthropic.AsyncAnthropic)
+    client.messages = MagicMock()
+    client.messages.create = AsyncMock(return_value=mock_response or make_response("end_turn", []))
+    registry = tools or ToolRegistry([])
+    agent = AnthropicAgent(
+        client=client,
+        tools=registry,
+        system_prompt="You are helpful.",
+        name="test-agent",
+    )
+    return agent, client.messages.create
+
+
+# ---------------------------------------------------------------------------
+# end_turn with a single TextBlock
+# ---------------------------------------------------------------------------
+
+
+def test_end_turn_single_text_block() -> None:
+    content = [make_text_block("Hello!")]
+    resp = make_response("end_turn", content)
+    agent, _ = make_agent(mock_response=resp)
+
+    result = asyncio.run(agent.send("Hi"))
+
+    assert result.stop_reason is StopReason.END_TURN
+    assert result.text == "Hello!"
+    assert result.tool_calls == []
+    assert len(agent.history) == 2
+    assert agent.history[0] == {"role": "user", "content": "Hi"}
+    assert agent.history[1] == {"role": "assistant", "content": content}
+
+
+# ---------------------------------------------------------------------------
+# tool_use
+# ---------------------------------------------------------------------------
+
+
+def test_tool_use_single_block() -> None:
+    block = make_tool_use_block(id="toolu_42", name="read_file", input={"path": "/etc/hosts"})
+    resp = make_response("tool_use", [block])
+    agent, _ = make_agent(mock_response=resp)
+
+    result = asyncio.run(agent.send("Read hosts"))
+
+    assert result.stop_reason is StopReason.TOOL_USE
+    assert len(result.tool_calls) == 1
+    tc = result.tool_calls[0]
+    assert tc.id == "toolu_42"
+    assert tc.name == "read_file"
+    assert tc.input == {"path": "/etc/hosts"}
+
+
+# ---------------------------------------------------------------------------
+# mixed TextBlock + ToolUseBlock
+# ---------------------------------------------------------------------------
+
+
+def test_mixed_text_and_tool_use() -> None:
+    content = [
+        make_text_block("I'll read the file for you."),
+        make_tool_use_block(id="toolu_01", name="read_file"),
+    ]
+    resp = make_response("tool_use", content)
+    agent, _ = make_agent(mock_response=resp)
+
+    result = asyncio.run(agent.send("Read a file"))
+
+    assert result.text == "I'll read the file for you."
+    assert len(result.tool_calls) == 1
+
+
+# ---------------------------------------------------------------------------
+# Multiple TextBlocks are concatenated
+# ---------------------------------------------------------------------------
+
+
+def test_multiple_text_blocks_are_concatenated() -> None:
+    content = [make_text_block("Hello, "), make_text_block("world!")]
+    resp = make_response("end_turn", content)
+    agent, _ = make_agent(mock_response=resp)
+
+    result = asyncio.run(agent.send("Hi"))
+
+    assert result.text == "Hello, world!"
+
+
+# ---------------------------------------------------------------------------
+# max_tokens is a normal response (not an error)
+# ---------------------------------------------------------------------------
+
+
+def test_max_tokens_is_not_an_error() -> None:
+    resp = make_response("max_tokens", [make_text_block("Truncated...")])
+    agent, _ = make_agent(mock_response=resp)
+
+    result = asyncio.run(agent.send("Tell me everything"))
+
+    assert result.stop_reason is StopReason.MAX_TOKENS
+    assert len(agent.history) == 2
+
+
+# ---------------------------------------------------------------------------
+# allowed_tools filtering
+# ---------------------------------------------------------------------------
+
+
+class FakeTool:
+    def __init__(self, name: str) -> None:
+        self._name = name
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def description(self) -> str:
+        return ""
+
+    @property
+    def definition(self) -> dict:
+        return {"name": self._name, "description": "", "input_schema": {}}
+
+    async def run(self, raw: dict) -> None:
+        pass
+
+
+@pytest.mark.parametrize(
+    ("tool_names", "allowed_tools", "expected_names"),
+    [
+        (["read_file", "grep", "mkdir"], ["read_file"], ["read_file"]),
+        (["a", "b"], None, ["a", "b"]),
+    ],
+)
+def test_allowed_tools(
+    tool_names: list[str],
+    allowed_tools: list[str] | None,
+    expected_names: list[str],
+) -> None:
+    registry = ToolRegistry([FakeTool(n) for n in tool_names])
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, create_mock = make_agent(tools=registry, mock_response=resp)
+
+    asyncio.run(agent.send("Hi", allowed_tools=allowed_tools))
+
+    sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]]
+    assert sent_names == expected_names
+
+
+@pytest.mark.parametrize("allowed_tools", [[], ["nonexistent_tool"]])
+def test_allowed_tools_passes_not_given(allowed_tools: list[str]) -> None:
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, create_mock = make_agent(mock_response=resp)
+
+    asyncio.run(agent.send("Hi", allowed_tools=allowed_tools))
+
+    assert create_mock.call_args.kwargs["tools"] is anthropic.NOT_GIVEN
+
+
+# ---------------------------------------------------------------------------
+# API errors map to the correct AgentError subclass
+# ---------------------------------------------------------------------------
+
+_mock_500 = MagicMock()
+_mock_500.status_code = 500
+
+
+@pytest.mark.parametrize(
+    "side_effect,expected_exc,extra_check",
+    [
+        (
+            anthropic.APIConnectionError(request=MagicMock()),
+            AgentConnectionError,
+            lambda e: "Connection failed" in str(e),
+        ),
+        (
+            anthropic.RateLimitError(
+                message="rate limit",
+                response=MagicMock(status_code=429, headers={}),
+                body=None,
+            ),
+            AgentRateLimitError,
+            lambda e: "Rate limit exceeded" in str(e),
+        ),
+        (
+            anthropic.APIStatusError(
+                message="internal server error",
+                response=_mock_500,
+                body=None,
+            ),
+            AgentAPIError,
+            lambda e: e.status_code == 500,
+        ),
+        (
+            anthropic.APIResponseValidationError(
+                response=MagicMock(),
+                body=None,
+            ),
+            AgentError,
+            lambda e: "Response validation failed" in str(e),
+        ),
+    ],
+)
+def test_api_errors_map_correctly(
+    side_effect: Exception,
+    expected_exc: type[AgentError],
+    extra_check: Callable[[AgentError], bool],
+) -> None:
+    client = MagicMock(spec=anthropic.AsyncAnthropic)
+    client.messages = MagicMock()
+    client.messages.create = AsyncMock(side_effect=side_effect)
+    agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
+
+    with pytest.raises(expected_exc) as exc_info:
+        asyncio.run(agent.send("Hi"))
+
+    assert extra_check(exc_info.value)
+    assert agent.history == []
+
+
+# ---------------------------------------------------------------------------
+# Unknown stop_reason raises AgentError, history unchanged
+# ---------------------------------------------------------------------------
+
+
+def test_unknown_stop_reason_raises_agent_error() -> None:
+    resp = make_response("totally_unknown_reason", [])
+    agent, _ = make_agent(mock_response=resp)
+
+    with pytest.raises(AgentError) as exc_info:
+        asyncio.run(agent.send("Hi"))
+
+    assert agent.history == []
+    assert "Unknown stop_reason" in str(exc_info.value)
+    assert "totally_unknown_reason" in str(exc_info.value)
+
+
+# ---------------------------------------------------------------------------
+# cache_read_input_tokens=None defaults to 0
+# ---------------------------------------------------------------------------
+
+
+def test_cache_tokens_none_defaults_to_zero() -> None:
+    usage = make_usage(cache_read=None, cache_creation=None)
+    resp = make_response("end_turn", [make_text_block("ok")], usage=usage)
+    agent, _ = make_agent(mock_response=resp)
+
+    result = asyncio.run(agent.send("Hi"))
+
+    assert result.usage.cache_read_input_tokens == 0
+    assert result.usage.cache_creation_input_tokens == 0
+
+
+# ---------------------------------------------------------------------------
+# Multi-turn — send str then send tool results → history has 4 entries
+# ---------------------------------------------------------------------------
+
+
+def test_multi_turn_history_grows_correctly() -> None:
+    tool_resp = make_response("tool_use", [make_tool_use_block(id="toolu_01")])
+    text_resp = make_response("end_turn", [make_text_block("Done.")])
+
+    client = MagicMock(spec=anthropic.AsyncAnthropic)
+    client.messages = MagicMock()
+    client.messages.create = AsyncMock(side_effect=[tool_resp, text_resp])
+    agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
+
+    first = asyncio.run(agent.send("Do X"))
+    assert first.stop_reason is StopReason.TOOL_USE
+    assert len(agent.history) == 2
+
+    tool_results = [{"type": "tool_result", "tool_use_id": "toolu_01", "content": "result"}]
+    second = asyncio.run(agent.send(tool_results))
+    assert second.stop_reason is StopReason.END_TURN
+    assert len(agent.history) == 4
+    assert agent.history[2]["role"] == "user"
+    assert agent.history[3]["role"] == "assistant"
+
+
+# ---------------------------------------------------------------------------
+# history property returns a copy
+# ---------------------------------------------------------------------------
+
+
+def test_history_property_returns_copy() -> None:
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, _ = make_agent(mock_response=resp)
+    asyncio.run(agent.send("Hi"))
+
+    snapshot = agent.history
+    snapshot.clear()
+
+    assert len(agent.history) == 2
+
+
+# ---------------------------------------------------------------------------
+# reset() clears history
+# ---------------------------------------------------------------------------
+
+
+def test_reset_clears_history() -> None:
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, _ = make_agent(mock_response=resp)
+    asyncio.run(agent.send("Hi"))
+    assert len(agent.history) == 2
+
+    agent.reset()
+    assert agent.history == []
+
+
+# ---------------------------------------------------------------------------
+# Error mid-conversation leaves history unchanged
+# ---------------------------------------------------------------------------
+
+
+def test_error_mid_conversation_leaves_history_unchanged() -> None:
+    ok_resp = make_response("end_turn", [make_text_block("ok")])
+    client = MagicMock(spec=anthropic.AsyncAnthropic)
+    client.messages = MagicMock()
+    client.messages.create = AsyncMock(
+        side_effect=[
+            ok_resp,
+            anthropic.APIConnectionError(request=MagicMock()),
+        ]
+    )
+    agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
+
+    asyncio.run(agent.send("First message"))
+    history_after_first = agent.history[:]
+
+    with pytest.raises(AgentConnectionError):
+        asyncio.run(agent.send("Second message"))
+
+    assert agent.history == history_after_first

From 953d3d2d200baf8af340be3b5dacd343e2301e6a Mon Sep 17 00:00:00 2001
From: Luis Orofino <luis.orofino@datadoghq.com>
Date: Wed, 25 Mar 2026 16:58:57 +0100
Subject: [PATCH 2/5] Fix some bugs and improved tests

---
 ddev/src/ddev/ai/agent/agent.py   |   8 +-
 ddev/src/ddev/ai/agent/types.py   |   5 +-
 ddev/tests/ai/agent/test_agent.py | 131 +++++++++++++++---------------
 3 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/ddev/src/ddev/ai/agent/agent.py b/ddev/src/ddev/ai/agent/agent.py
index 4cbed20072f26..7659f7fbfe474 100644
--- a/ddev/src/ddev/ai/agent/agent.py
+++ b/ddev/src/ddev/ai/agent/agent.py
@@ -5,7 +5,7 @@
 from typing import Final
 
 import anthropic
-from anthropic.types import MessageParam, ToolResultBlockParam
+from anthropic.types import MessageParam, ToolParam, ToolResultBlockParam
 
 from ddev.ai.tools.core.registry import ToolRegistry
 
@@ -51,7 +51,7 @@ def reset(self) -> None:
         """Clear conversation history to start a new conversation."""
         self._history = []
 
-    def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list:
+    def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list[ToolParam]:
         """Filter tool definitions by allowlist. None means all tools."""
         if allowed_tools is None:
             return self._tools.definitions
@@ -91,8 +91,8 @@ async def send(
 
         try:
             stop_reason = StopReason(response.stop_reason)
-        except ValueError:
-            raise AgentError(f"Unknown stop_reason: {response.stop_reason!r}") from None
+        except ValueError as e:
+            raise AgentError(f"Unknown stop_reason: {response.stop_reason!r}") from e
 
         text_parts: list[str] = []
         tool_calls: list[ToolCall] = []
diff --git a/ddev/src/ddev/ai/agent/types.py b/ddev/src/ddev/ai/agent/types.py
index 677ab92d25350..1c4b701b84ddb 100644
--- a/ddev/src/ddev/ai/agent/types.py
+++ b/ddev/src/ddev/ai/agent/types.py
@@ -5,8 +5,6 @@
 from dataclasses import dataclass
 from enum import StrEnum
 
-from pydantic import BaseModel
-
 
 class StopReason(StrEnum):
     """Maps Anthropic API stop_reason strings to a typed enum."""
@@ -38,7 +36,8 @@ class TokenUsage:
     cache_creation_input_tokens: int
 
 
-class AgentResponse(BaseModel):
+@dataclass(frozen=True)
+class AgentResponse:
     """The complete response from a single AnthropicAgent.send() call."""
 
     stop_reason: StopReason
diff --git a/ddev/tests/ai/agent/test_agent.py b/ddev/tests/ai/agent/test_agent.py
index 7fa04103b9328..8372e2d00e519 100644
--- a/ddev/tests/ai/agent/test_agent.py
+++ b/ddev/tests/ai/agent/test_agent.py
@@ -3,7 +3,6 @@
 # Licensed under a 3-clause BSD style license (see LICENSE)
 
 import asyncio
-from collections.abc import Callable
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
@@ -19,6 +18,7 @@
     StopReason,
 )
 from ddev.ai.tools.core.registry import ToolRegistry
+from ddev.ai.tools.core.types import ToolResult
 
 # ---------------------------------------------------------------------------
 # Helpers
@@ -195,30 +195,30 @@ def description(self) -> str:
     def definition(self) -> dict:
         return {"name": self._name, "description": "", "input_schema": {}}
 
-    async def run(self, raw: dict) -> None:
+    async def run(self, raw: dict) -> ToolResult:
         pass
 
 
-@pytest.mark.parametrize(
-    ("tool_names", "allowed_tools", "expected_names"),
-    [
-        (["read_file", "grep", "mkdir"], ["read_file"], ["read_file"]),
-        (["a", "b"], None, ["a", "b"]),
-    ],
-)
-def test_allowed_tools(
-    tool_names: list[str],
-    allowed_tools: list[str] | None,
-    expected_names: list[str],
-) -> None:
-    registry = ToolRegistry([FakeTool(n) for n in tool_names])
+def test_allowed_tools_filters_to_subset() -> None:
+    registry = ToolRegistry([FakeTool(n) for n in ["read_file", "grep", "mkdir"]])
     resp = make_response("end_turn", [make_text_block("ok")])
     agent, create_mock = make_agent(tools=registry, mock_response=resp)
 
-    asyncio.run(agent.send("Hi", allowed_tools=allowed_tools))
+    asyncio.run(agent.send("Hi", allowed_tools=["read_file"]))
+
+    sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]]
+    assert sent_names == ["read_file"]
+
+
+def test_allowed_tools_none_passes_all() -> None:
+    registry = ToolRegistry([FakeTool(n) for n in ["a", "b"]])
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, create_mock = make_agent(tools=registry, mock_response=resp)
+
+    asyncio.run(agent.send("Hi", allowed_tools=None))
 
     sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]]
-    assert sent_names == expected_names
+    assert sent_names == ["a", "b"]
 
 
 @pytest.mark.parametrize("allowed_tools", [[], ["nonexistent_tool"]])
@@ -235,60 +235,63 @@ def test_allowed_tools_passes_not_given(allowed_tools: list[str]) -> None:
 # API errors map to the correct AgentError subclass
 # ---------------------------------------------------------------------------
 
-_mock_500 = MagicMock()
-_mock_500.status_code = 500
 
-
-@pytest.mark.parametrize(
-    "side_effect,expected_exc,extra_check",
-    [
-        (
-            anthropic.APIConnectionError(request=MagicMock()),
-            AgentConnectionError,
-            lambda e: "Connection failed" in str(e),
-        ),
-        (
-            anthropic.RateLimitError(
-                message="rate limit",
-                response=MagicMock(status_code=429, headers={}),
-                body=None,
-            ),
-            AgentRateLimitError,
-            lambda e: "Rate limit exceeded" in str(e),
-        ),
-        (
-            anthropic.APIStatusError(
-                message="internal server error",
-                response=_mock_500,
-                body=None,
-            ),
-            AgentAPIError,
-            lambda e: e.status_code == 500,
-        ),
-        (
-            anthropic.APIResponseValidationError(
-                response=MagicMock(),
-                body=None,
-            ),
-            AgentError,
-            lambda e: "Response validation failed" in str(e),
-        ),
-    ],
-)
-def test_api_errors_map_correctly(
-    side_effect: Exception,
-    expected_exc: type[AgentError],
-    extra_check: Callable[[AgentError], bool],
-) -> None:
+def _make_error_agent(side_effect: Exception) -> AnthropicAgent:
     client = MagicMock(spec=anthropic.AsyncAnthropic)
     client.messages = MagicMock()
     client.messages.create = AsyncMock(side_effect=side_effect)
-    agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
+    return AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
+
+
+def test_connection_error_maps_to_agent_connection_error() -> None:
+    agent = _make_error_agent(anthropic.APIConnectionError(request=MagicMock()))
+
+    with pytest.raises(AgentConnectionError) as exc_info:
+        asyncio.run(agent.send("Hi"))
+
+    assert "Connection failed" in str(exc_info.value)
+    assert agent.history == []
+
+
+def test_rate_limit_error_maps_to_agent_rate_limit_error() -> None:
+    agent = _make_error_agent(
+        anthropic.RateLimitError(
+            message="rate limit",
+            response=MagicMock(status_code=429, headers={}),
+            body=None,
+        )
+    )
+
+    with pytest.raises(AgentRateLimitError) as exc_info:
+        asyncio.run(agent.send("Hi"))
+
+    assert "Rate limit exceeded" in str(exc_info.value)
+    assert agent.history == []
+
 
-    with pytest.raises(expected_exc) as exc_info:
+def test_api_status_error_maps_to_agent_api_error() -> None:
+    agent = _make_error_agent(
+        anthropic.APIStatusError(
+            message="internal server error",
+            response=MagicMock(status_code=500),
+            body=None,
+        )
+    )
+
+    with pytest.raises(AgentAPIError) as exc_info:
+        asyncio.run(agent.send("Hi"))
+
+    assert exc_info.value.status_code == 500
+    assert agent.history == []
+
+
+def test_response_validation_error_maps_to_agent_error() -> None:
+    agent = _make_error_agent(anthropic.APIResponseValidationError(response=MagicMock(), body=None))
+
+    with pytest.raises(AgentError) as exc_info:
         asyncio.run(agent.send("Hi"))
 
-    assert extra_check(exc_info.value)
+    assert "Response validation failed" in str(exc_info.value)
     assert agent.history == []
 
 

From 57ed67d14b2d0492704a834b33c0e8bd80a6e8d0 Mon Sep 17 00:00:00 2001
From: Luis Orofino <luis.orofino@datadoghq.com>
Date: Wed, 25 Mar 2026 18:09:21 +0100
Subject: [PATCH 3/5] Rename agents to client and added \n btw TextBlocks

---
 .../src/ddev/ai/agent/{agent.py => client.py} | 57 ++++++++++++---
 ddev/src/ddev/ai/agent/exceptions.py          | 29 ++++++++
 ddev/src/ddev/ai/agent/types.py               | 72 -------------------
 ddev/tests/ai/agent/test_agent.py             |  7 +-
 4 files changed, 80 insertions(+), 85 deletions(-)
 rename ddev/src/ddev/ai/agent/{agent.py => client.py} (77%)
 create mode 100644 ddev/src/ddev/ai/agent/exceptions.py
 delete mode 100644 ddev/src/ddev/ai/agent/types.py

diff --git a/ddev/src/ddev/ai/agent/agent.py b/ddev/src/ddev/ai/agent/client.py
similarity index 77%
rename from ddev/src/ddev/ai/agent/agent.py
rename to ddev/src/ddev/ai/agent/client.py
index 7659f7fbfe474..b0b3f8e874c05 100644
--- a/ddev/src/ddev/ai/agent/agent.py
+++ b/ddev/src/ddev/ai/agent/client.py
@@ -2,6 +2,9 @@
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
 
+from copy import deepcopy
+from dataclasses import dataclass
+from enum import StrEnum
 from typing import Final
 
 import anthropic
@@ -9,21 +12,57 @@
 
 from ddev.ai.tools.core.registry import ToolRegistry
 
-from .types import (
+from .exceptions import (
     AgentAPIError,
     AgentConnectionError,
     AgentError,
     AgentRateLimitError,
-    AgentResponse,
-    StopReason,
-    TokenUsage,
-    ToolCall,
 )
 
-MODEL: Final[str] = "claude-opus-4-6"
+MODEL: Final[str] = "claude-sonnet-4-6"
 MAX_TOKENS: Final[int] = 8192
 
 
+class StopReason(StrEnum):
+    """Maps Anthropic API stop_reason strings to a typed enum."""
+
+    END_TURN = "end_turn"
+    MAX_TOKENS = "max_tokens"
+    STOP_SEQUENCE = "stop_sequence"
+    TOOL_USE = "tool_use"
+    PAUSE_TURN = "pause_turn"
+    REFUSAL = "refusal"
+
+
+@dataclass(frozen=True)
+class ToolCall:
+    """A single tool invocation requested by the model."""
+
+    id: str
+    name: str
+    input: dict[str, object]
+
+
+@dataclass(frozen=True)
+class TokenUsage:
+    """Token accounting from a single API call."""
+
+    input_tokens: int
+    output_tokens: int
+    cache_read_input_tokens: int
+    cache_creation_input_tokens: int
+
+
+@dataclass(frozen=True)
+class AgentResponse:
+    """The complete response from a single AnthropicAgent.send() call."""
+
+    stop_reason: StopReason
+    text: str
+    tool_calls: list[ToolCall]
+    usage: TokenUsage
+
+
 class AnthropicAgent:
     def __init__(
         self,
@@ -45,7 +84,7 @@ def __init__(
     @property
     def history(self) -> list[MessageParam]:
         """Read-only snapshot of the conversation history."""
-        return list(self._history)
+        return deepcopy(self._history)
 
     def reset(self) -> None:
         """Clear conversation history to start a new conversation."""
@@ -114,12 +153,12 @@ async def send(
 
         agent_response = AgentResponse(
             stop_reason=stop_reason,
-            text="".join(text_parts),
+            text="\n".join(text_parts),
             tool_calls=tool_calls,
             usage=usage,
         )
 
         # Save to history only after a successful response.
-        self._history = [*messages, {"role": "assistant", "content": response.content}]
+        self._history.extend([user_msg, {"role": "assistant", "content": response.content}])
 
         return agent_response
diff --git a/ddev/src/ddev/ai/agent/exceptions.py b/ddev/src/ddev/ai/agent/exceptions.py
new file mode 100644
index 0000000000000..d0d25d3665239
--- /dev/null
+++ b/ddev/src/ddev/ai/agent/exceptions.py
@@ -0,0 +1,29 @@
+# (C) Datadog, Inc. 2026-present
+# All rights reserved
+# Licensed under a 3-clause BSD style license (see LICENSE)
+
+
+class AgentError(Exception):
+    """Base class for all errors raised by AnthropicAgent."""
+
+    pass
+
+
+class AgentConnectionError(AgentError):
+    """Network failure — the API was unreachable."""
+
+    pass
+
+
+class AgentRateLimitError(AgentError):
+    """Rate limit hit — the request may be retried after a delay."""
+
+    pass
+
+
+class AgentAPIError(AgentError):
+    """The API returned an error status code."""
+
+    def __init__(self, status_code: int, message: str) -> None:
+        super().__init__(message)
+        self.status_code = status_code
diff --git a/ddev/src/ddev/ai/agent/types.py b/ddev/src/ddev/ai/agent/types.py
deleted file mode 100644
index 1c4b701b84ddb..0000000000000
--- a/ddev/src/ddev/ai/agent/types.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# (C) Datadog, Inc. 2026-present
-# All rights reserved
-# Licensed under a 3-clause BSD style license (see LICENSE)
-
-from dataclasses import dataclass
-from enum import StrEnum
-
-
-class StopReason(StrEnum):
-    """Maps Anthropic API stop_reason strings to a typed enum."""
-
-    END_TURN = "end_turn"
-    MAX_TOKENS = "max_tokens"
-    STOP_SEQUENCE = "stop_sequence"
-    TOOL_USE = "tool_use"
-    PAUSE_TURN = "pause_turn"
-    REFUSAL = "refusal"
-
-
-@dataclass(frozen=True)
-class ToolCall:
-    """A single tool invocation requested by the model."""
-
-    id: str
-    name: str
-    input: dict[str, object]
-
-
-@dataclass(frozen=True)
-class TokenUsage:
-    """Token accounting from a single API call."""
-
-    input_tokens: int
-    output_tokens: int
-    cache_read_input_tokens: int
-    cache_creation_input_tokens: int
-
-
-@dataclass(frozen=True)
-class AgentResponse:
-    """The complete response from a single AnthropicAgent.send() call."""
-
-    stop_reason: StopReason
-    text: str
-    tool_calls: list[ToolCall]
-    usage: TokenUsage
-
-
-class AgentError(Exception):
-    """Base class for all errors raised by AnthropicAgent."""
-
-    pass
-
-
-class AgentConnectionError(AgentError):
-    """Network failure — the API was unreachable."""
-
-    pass
-
-
-class AgentRateLimitError(AgentError):
-    """Rate limit hit — the request may be retried after a delay."""
-
-    pass
-
-
-class AgentAPIError(AgentError):
-    """The API returned an error status code."""
-
-    def __init__(self, status_code: int, message: str) -> None:
-        super().__init__(message)
-        self.status_code = status_code
diff --git a/ddev/tests/ai/agent/test_agent.py b/ddev/tests/ai/agent/test_agent.py
index 8372e2d00e519..0a261baf02679 100644
--- a/ddev/tests/ai/agent/test_agent.py
+++ b/ddev/tests/ai/agent/test_agent.py
@@ -9,13 +9,12 @@
 import anthropic
 import pytest
 
-from ddev.ai.agent.agent import AnthropicAgent
-from ddev.ai.agent.types import (
+from ddev.ai.agent.client import AnthropicAgent, StopReason
+from ddev.ai.agent.exceptions import (
     AgentAPIError,
     AgentConnectionError,
     AgentError,
     AgentRateLimitError,
-    StopReason,
 )
 from ddev.ai.tools.core.registry import ToolRegistry
 from ddev.ai.tools.core.types import ToolResult
@@ -156,7 +155,7 @@ def test_multiple_text_blocks_are_concatenated() -> None:
 
     result = asyncio.run(agent.send("Hi"))
 
-    assert result.text == "Hello, world!"
+    assert result.text == "Hello, \nworld!"
 
 
 # ---------------------------------------------------------------------------

From c9b34475136defa4b2f2116912782b3d5786e366 Mon Sep 17 00:00:00 2001
From: Luis Orofino <luis.orofino@datadoghq.com>
Date: Thu, 26 Mar 2026 16:00:21 +0100
Subject: [PATCH 4/5] Add ContextUsage and modify tools' allowed_callers

---
 ddev/pyproject.toml                           |  2 +-
 ddev/src/ddev/ai/agent/client.py              | 56 +++++++++++++++----
 ddev/src/ddev/ai/tools/core/registry.py       |  9 +--
 .../agent/{test_agent.py => test_client.py}   | 44 +++++++++++++++
 ddev/tests/ai/tools/core/test_registry.py     |  4 +-
 5 files changed, 93 insertions(+), 22 deletions(-)
 rename ddev/tests/ai/agent/{test_agent.py => test_client.py} (88%)

diff --git a/ddev/pyproject.toml b/ddev/pyproject.toml
index 8cfbc07271cf7..3118006baa38d 100644
--- a/ddev/pyproject.toml
+++ b/ddev/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
-    "anthropic>=0.18.0",
+    "anthropic>=0.86.0",
     "click~=8.1.6",
     "coverage",
     "datadog-api-client==2.20.0",
diff --git a/ddev/src/ddev/ai/agent/client.py b/ddev/src/ddev/ai/agent/client.py
index b0b3f8e874c05..6c1c6d44f70af 100644
--- a/ddev/src/ddev/ai/agent/client.py
+++ b/ddev/src/ddev/ai/agent/client.py
@@ -20,7 +20,8 @@
 )
 
 MODEL: Final[str] = "claude-sonnet-4-6"
-MAX_TOKENS: Final[int] = 8192
+MAX_TOKENS: Final[int] = 8192  # max tokens per response
+ALLOWED_TOOL_CALLERS: Final = ["code_execution_20260120"]
 
 
 class StopReason(StrEnum):
@@ -43,14 +44,31 @@ class ToolCall:
     input: dict[str, object]
 
 
+@dataclass(frozen=True)
+class ContextUsage:
+    """Context window accounting for a single API call."""
+
+    window_size: int
+    used_tokens: int
+
+    @property
+    def context_pct(self) -> float:
+        return self.used_tokens / self.window_size * 100
+
+    @property
+    def remaining_tokens(self) -> int:
+        return self.window_size - self.used_tokens
+
+
 @dataclass(frozen=True)
 class TokenUsage:
     """Token accounting from a single API call."""
 
-    input_tokens: int
-    output_tokens: int
-    cache_read_input_tokens: int
-    cache_creation_input_tokens: int
+    input_tokens: int  # tokens sent to the model (system_prompt + history)
+    output_tokens: int  # tokens the model generated
+    cache_read_input_tokens: int  # tokens read from prompt cache
+    cache_creation_input_tokens: int  # tokens written to prompt cache
+    context: ContextUsage
 
 
 @dataclass(frozen=True)
@@ -72,6 +90,7 @@ def __init__(
         name: str,
         model: str = MODEL,
         max_tokens: int = MAX_TOKENS,
+        tool_execution: bool = False,
     ) -> None:
         self._client = client
         self._tools = tools
@@ -79,7 +98,9 @@ def __init__(
         self.name = name
         self._model = model
         self._max_tokens = max_tokens
+        self._tool_execution = tool_execution
         self._history: list[MessageParam] = []
+        self._context_window: int | None = None
 
     @property
     def history(self) -> list[MessageParam]:
@@ -90,12 +111,21 @@ def reset(self) -> None:
         """Clear conversation history to start a new conversation."""
         self._history = []
 
+    async def _get_context_window(self) -> int:
+        if self._context_window is None:
+            info = await self._client.models.retrieve(self._model)
+            self._context_window = info.max_input_tokens
+        return self._context_window
+
     def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list[ToolParam]:
         """Filter tool definitions by allowlist. None means all tools."""
-        if allowed_tools is None:
-            return self._tools.definitions
-        allowed = set(allowed_tools)
-        return [d for d in self._tools.definitions if d["name"] in allowed]
+        definitions = self._tools.definitions
+        if allowed_tools is not None:
+            allowed = set(allowed_tools)
+            definitions = [d for d in definitions if d["name"] in allowed]
+        if not self._tool_execution:
+            definitions = [{**d, "allowed_callers": ALLOWED_TOOL_CALLERS} for d in definitions]
+        return definitions
 
     async def send(
         self,
@@ -144,11 +174,15 @@ async def send(
         # ThinkingBlock and RedactedThinkingBlock are intentionally ignored.
         # Extended thinking support can add a `thinking: str` field to AgentResponse later.
 
+        cache_read = response.usage.cache_read_input_tokens or 0
+        cache_creation = response.usage.cache_creation_input_tokens or 0
+        used_tokens = response.usage.input_tokens + cache_read + cache_creation
         usage = TokenUsage(
             input_tokens=response.usage.input_tokens,
             output_tokens=response.usage.output_tokens,
-            cache_read_input_tokens=response.usage.cache_read_input_tokens or 0,
-            cache_creation_input_tokens=response.usage.cache_creation_input_tokens or 0,
+            cache_read_input_tokens=cache_read,
+            cache_creation_input_tokens=cache_creation,
+            context=ContextUsage(window_size=await self._get_context_window(), used_tokens=used_tokens),
         )
 
         agent_response = AgentResponse(
diff --git a/ddev/src/ddev/ai/tools/core/registry.py b/ddev/src/ddev/ai/tools/core/registry.py
index 29c6f92fb8801..240e969a81843 100644
--- a/ddev/src/ddev/ai/tools/core/registry.py
+++ b/ddev/src/ddev/ai/tools/core/registry.py
@@ -2,15 +2,11 @@
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
 
-from typing import Final
-
 from anthropic.types import ToolParam
 
 from .protocol import ToolProtocol
 from .types import ToolResult
 
-ALLOWED_TOOL_CALLERS: Final = ["code_execution_20260120"]
-
 
 class ToolRegistry:
     """Registry holding all available tools."""
@@ -20,9 +16,8 @@ def __init__(self, tools: list[ToolProtocol]) -> None:
 
     @property
     def definitions(self) -> list[ToolParam]:
-        """Return Anthropic SDK tool definitions for all registered tools.
-        Each tool definition dict is not mutated, but a new dict is returned with the allowed_callers key added."""
-        return [{**tool.definition, "allowed_callers": ALLOWED_TOOL_CALLERS} for tool in self._tools.values()]
+        """Return Anthropic SDK tool definitions for all registered tools."""
+        return [tool.definition for tool in self._tools.values()]
 
     async def run(self, name: str, raw: dict[str, object]) -> ToolResult:
         """Execute a tool by name, returning an error result if not found."""
diff --git a/ddev/tests/ai/agent/test_agent.py b/ddev/tests/ai/agent/test_client.py
similarity index 88%
rename from ddev/tests/ai/agent/test_agent.py
rename to ddev/tests/ai/agent/test_client.py
index 0a261baf02679..e67a6ee8d0785 100644
--- a/ddev/tests/ai/agent/test_agent.py
+++ b/ddev/tests/ai/agent/test_client.py
@@ -67,6 +67,9 @@ def make_response(
     )
 
 
+FAKE_CONTEXT_WINDOW = 200_000
+
+
 def make_agent(
     tools: ToolRegistry | None = None,
     mock_response: SimpleNamespace | None = None,
@@ -74,6 +77,8 @@ def make_agent(
     client = MagicMock(spec=anthropic.AsyncAnthropic)
     client.messages = MagicMock()
     client.messages.create = AsyncMock(return_value=mock_response or make_response("end_turn", []))
+    client.models = MagicMock()
+    client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW))
     registry = tools or ToolRegistry([])
     agent = AnthropicAgent(
         client=client,
@@ -327,6 +332,41 @@ def test_cache_tokens_none_defaults_to_zero() -> None:
     assert result.usage.cache_creation_input_tokens == 0
 
 
+# ---------------------------------------------------------------------------
+# ContextUsage fields
+# ---------------------------------------------------------------------------
+
+
+def test_context_usage_fields() -> None:
+    usage = make_usage(input_tokens=1000, cache_read=500, cache_creation=200)
+    resp = make_response("end_turn", [make_text_block("ok")], usage=usage)
+    agent, _ = make_agent(mock_response=resp)
+
+    result = asyncio.run(agent.send("Hi"))
+
+    ctx = result.usage.context
+    assert ctx.window_size == FAKE_CONTEXT_WINDOW
+    assert ctx.used_tokens == 1700  # 1000 + 500 + 200
+    assert ctx.context_pct == pytest.approx(1700 / FAKE_CONTEXT_WINDOW * 100)
+    assert ctx.remaining_tokens == FAKE_CONTEXT_WINDOW - 1700
+
+
+# ---------------------------------------------------------------------------
+# context_window is fetched once and cached across multiple sends
+# ---------------------------------------------------------------------------
+
+
+def test_context_window_fetched_once() -> None:
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, _ = make_agent(mock_response=resp)
+    agent._client.messages.create = AsyncMock(return_value=resp)
+
+    asyncio.run(agent.send("First"))
+    asyncio.run(agent.send("Second"))
+
+    agent._client.models.retrieve.assert_awaited_once()
+
+
 # ---------------------------------------------------------------------------
 # Multi-turn — send str then send tool results → history has 4 entries
 # ---------------------------------------------------------------------------
@@ -339,6 +379,8 @@ def test_multi_turn_history_grows_correctly() -> None:
     client = MagicMock(spec=anthropic.AsyncAnthropic)
     client.messages = MagicMock()
     client.messages.create = AsyncMock(side_effect=[tool_resp, text_resp])
+    client.models = MagicMock()
+    client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW))
     agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
 
     first = asyncio.run(agent.send("Do X"))
@@ -399,6 +441,8 @@ def test_error_mid_conversation_leaves_history_unchanged() -> None:
             anthropic.APIConnectionError(request=MagicMock()),
         ]
     )
+    client.models = MagicMock()
+    client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW))
     agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
 
     asyncio.run(agent.send("First message"))
diff --git a/ddev/tests/ai/tools/core/test_registry.py b/ddev/tests/ai/tools/core/test_registry.py
index fdd42714b6ed4..245b3f67490d1 100644
--- a/ddev/tests/ai/tools/core/test_registry.py
+++ b/ddev/tests/ai/tools/core/test_registry.py
@@ -5,7 +5,7 @@
 
 import pytest
 
-from ddev.ai.tools.core.registry import ALLOWED_TOOL_CALLERS, ToolRegistry
+from ddev.ai.tools.core.registry import ToolRegistry
 from ddev.ai.tools.core.types import ToolResult
 
 # ---------------------------------------------------------------------------
@@ -76,8 +76,6 @@ def test_empty_registry_returns_empty_list():
 def test_tool_registry_definitions_returns_all_tool_definitions():
     registry = ToolRegistry([FakeTool("a"), FakeTool("b")])
     assert len(registry.definitions) == 2
-    for defn in registry.definitions:
-        assert defn["allowed_callers"] == ALLOWED_TOOL_CALLERS
 
 
 def test_definition_contains_tool_name():

From 80fb3d2150e36ea220010397759c823b1ae97222 Mon Sep 17 00:00:00 2001
From: Luis Orofino <luis.orofino@datadoghq.com>
Date: Thu, 26 Mar 2026 17:29:07 +0100
Subject: [PATCH 5/5] Add docstrings and pytest-asyncio

---
 ddev/hatch.toml                            |  1 +
 ddev/pyproject.toml                        |  3 +
 ddev/src/ddev/ai/agent/client.py           | 41 +++++++---
 ddev/tests/ai/agent/test_client.py         | 87 +++++++++++-----------
 ddev/tests/ai/tools/core/test_base.py      | 13 ++--
 ddev/tests/ai/tools/core/test_registry.py  | 21 +++---
 ddev/tests/ai/tools/fs/conftest.py         |  5 +-
 ddev/tests/ai/tools/fs/test_append_file.py | 29 ++++----
 ddev/tests/ai/tools/fs/test_create_file.py | 29 ++++----
 ddev/tests/ai/tools/fs/test_edit_file.py   | 43 +++++------
 ddev/tests/ai/tools/fs/test_read_file.py   | 33 ++++----
 ddev/tests/ai/tools/fs/test_workflow.py    | 21 +++---
 ddev/tests/ai/tools/http/test_http_get.py  | 25 +++----
 ddev/tests/ai/tools/shell/test_base.py     | 48 ++++++------
 ddev/tests/ai/tools/shell/test_tools.py    |  5 +-
 15 files changed, 211 insertions(+), 193 deletions(-)

diff --git a/ddev/hatch.toml b/ddev/hatch.toml
index 8a07ebbf5c149..a5fa11668e95f 100644
--- a/ddev/hatch.toml
+++ b/ddev/hatch.toml
@@ -9,6 +9,7 @@ python = "3.13"
 e2e-env = false
 dependencies = [
   "pyyaml",
+  "pytest-asyncio",
   "vcrpy",
 ]
 # TODO: remove this when the old CLI is gone
diff --git a/ddev/pyproject.toml b/ddev/pyproject.toml
index 3118006baa38d..33af6ea6cbbeb 100644
--- a/ddev/pyproject.toml
+++ b/ddev/pyproject.toml
@@ -136,3 +136,6 @@ ban-relative-imports = "parents"
 [tool.ruff.lint.per-file-ignores]
 #Tests can use assertions and relative imports
 "**/tests/**/*" = ["I252"]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
diff --git a/ddev/src/ddev/ai/agent/client.py b/ddev/src/ddev/ai/agent/client.py
index 6c1c6d44f70af..d576429015cef 100644
--- a/ddev/src/ddev/ai/agent/client.py
+++ b/ddev/src/ddev/ai/agent/client.py
@@ -5,7 +5,7 @@
 from copy import deepcopy
 from dataclasses import dataclass
 from enum import StrEnum
-from typing import Final
+from typing import Any, Final
 
 import anthropic
 from anthropic.types import MessageParam, ToolParam, ToolResultBlockParam
@@ -19,8 +19,8 @@
     AgentRateLimitError,
 )
 
-MODEL: Final[str] = "claude-sonnet-4-6"
-MAX_TOKENS: Final[int] = 8192  # max tokens per response
+DEFAULT_MODEL: Final[str] = "claude-sonnet-4-6"
+DEFAULT_MAX_TOKENS: Final[int] = 8192  # max tokens per response
 ALLOWED_TOOL_CALLERS: Final = ["code_execution_20260120"]
 
 
@@ -41,7 +41,7 @@ class ToolCall:
 
     id: str
     name: str
-    input: dict[str, object]
+    input: dict[str, Any]
 
 
 @dataclass(frozen=True)
@@ -73,7 +73,8 @@ class TokenUsage:
 
 @dataclass(frozen=True)
 class AgentResponse:
-    """The complete response from a single AnthropicAgent.send() call."""
+    """The complete response from a single AnthropicAgent.send() call.
+    Adds useful metadata to the response of the Anthropic API."""
 
     stop_reason: StopReason
     text: str
@@ -82,23 +83,36 @@ class AgentResponse:
 
 
 class AnthropicAgent:
+    """A wrapper around the Anthropic API that provides a simple interface for interacting with agents."""
+
     def __init__(
         self,
         client: anthropic.AsyncAnthropic,
         tools: ToolRegistry,
         system_prompt: str,
         name: str,
-        model: str = MODEL,
-        max_tokens: int = MAX_TOKENS,
-        tool_execution: bool = False,
+        model: str = DEFAULT_MODEL,
+        max_tokens: int = DEFAULT_MAX_TOKENS,
+        programmatic_tool_calling: bool = False,
     ) -> None:
+        """Initialize an AnthropicAgent.
+        Args:
+            client: The Anthropic client to use.
+            tools: The ToolRegistry to use (might not be used in every call if allowed_tools in send() is provided)
+            system_prompt: The system prompt to use.
+            name: The name of the agent.
+            model: The model to use.
+            max_tokens: The max tokens per response.
+            programmatic_tool_calling: Whether to allow programmatic tool calling.
+        """
+
         self._client = client
         self._tools = tools
         self._system_prompt = system_prompt
         self.name = name
         self._model = model
         self._max_tokens = max_tokens
-        self._tool_execution = tool_execution
+        self._programmatic_tool_calling = programmatic_tool_calling
         self._history: list[MessageParam] = []
         self._context_window: int | None = None
 
@@ -123,7 +137,7 @@ def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list[ToolPar
         if allowed_tools is not None:
             allowed = set(allowed_tools)
             definitions = [d for d in definitions if d["name"] in allowed]
-        if not self._tool_execution:
+        if not self._programmatic_tool_calling:
             definitions = [{**d, "allowed_callers": ALLOWED_TOOL_CALLERS} for d in definitions]
         return definitions
 
@@ -132,6 +146,13 @@ async def send(
         content: str | list[ToolResultBlockParam],
         allowed_tools: list[str] | None = None,
     ) -> AgentResponse:
+        """Send a message to the agent and return the response.
+        Args:
+            content: The content to send to the agent.
+            allowed_tools: The tools in the ToolRegistry to allow the agent to use.
+        Returns:
+            An AgentResponse object containing the response from the agent.
+        """
         tool_defs = self._get_tool_definitions(allowed_tools)
 
         user_msg: MessageParam = {"role": "user", "content": content}
diff --git a/ddev/tests/ai/agent/test_client.py b/ddev/tests/ai/agent/test_client.py
index e67a6ee8d0785..f4d1b9f5e8c96 100644
--- a/ddev/tests/ai/agent/test_client.py
+++ b/ddev/tests/ai/agent/test_client.py
@@ -2,7 +2,6 @@
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
 
-import asyncio
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
@@ -94,12 +93,12 @@ def make_agent(
 # ---------------------------------------------------------------------------
 
 
-def test_end_turn_single_text_block() -> None:
+async def test_end_turn_single_text_block() -> None:
     content = [make_text_block("Hello!")]
     resp = make_response("end_turn", content)
     agent, _ = make_agent(mock_response=resp)
 
-    result = asyncio.run(agent.send("Hi"))
+    result = await agent.send("Hi")
 
     assert result.stop_reason is StopReason.END_TURN
     assert result.text == "Hello!"
@@ -114,12 +113,12 @@ def test_end_turn_single_text_block() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_tool_use_single_block() -> None:
+async def test_tool_use_single_block() -> None:
     block = make_tool_use_block(id="toolu_42", name="read_file", input={"path": "/etc/hosts"})
     resp = make_response("tool_use", [block])
     agent, _ = make_agent(mock_response=resp)
 
-    result = asyncio.run(agent.send("Read hosts"))
+    result = await agent.send("Read hosts")
 
     assert result.stop_reason is StopReason.TOOL_USE
     assert len(result.tool_calls) == 1
@@ -134,7 +133,7 @@ def test_tool_use_single_block() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_mixed_text_and_tool_use() -> None:
+async def test_mixed_text_and_tool_use() -> None:
     content = [
         make_text_block("I'll read the file for you."),
         make_tool_use_block(id="toolu_01", name="read_file"),
@@ -142,7 +141,7 @@ def test_mixed_text_and_tool_use() -> None:
     resp = make_response("tool_use", content)
     agent, _ = make_agent(mock_response=resp)
 
-    result = asyncio.run(agent.send("Read a file"))
+    result = await agent.send("Read a file")
 
     assert result.text == "I'll read the file for you."
     assert len(result.tool_calls) == 1
@@ -153,12 +152,12 @@ def test_mixed_text_and_tool_use() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_multiple_text_blocks_are_concatenated() -> None:
+async def test_multiple_text_blocks_are_concatenated() -> None:
     content = [make_text_block("Hello, "), make_text_block("world!")]
     resp = make_response("end_turn", content)
     agent, _ = make_agent(mock_response=resp)
 
-    result = asyncio.run(agent.send("Hi"))
+    result = await agent.send("Hi")
 
     assert result.text == "Hello, \nworld!"
 
@@ -168,11 +167,11 @@ def test_multiple_text_blocks_are_concatenated() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_max_tokens_is_not_an_error() -> None:
+async def test_max_tokens_is_not_an_error() -> None:
     resp = make_response("max_tokens", [make_text_block("Truncated...")])
     agent, _ = make_agent(mock_response=resp)
 
-    result = asyncio.run(agent.send("Tell me everything"))
+    result = await agent.send("Tell me everything")
 
     assert result.stop_reason is StopReason.MAX_TOKENS
     assert len(agent.history) == 2
@@ -203,34 +202,34 @@ async def run(self, raw: dict) -> ToolResult:
         pass
 
 
-def test_allowed_tools_filters_to_subset() -> None:
+async def test_allowed_tools_filters_to_subset() -> None:
     registry = ToolRegistry([FakeTool(n) for n in ["read_file", "grep", "mkdir"]])
     resp = make_response("end_turn", [make_text_block("ok")])
     agent, create_mock = make_agent(tools=registry, mock_response=resp)
 
-    asyncio.run(agent.send("Hi", allowed_tools=["read_file"]))
+    await agent.send("Hi", allowed_tools=["read_file"])
 
     sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]]
     assert sent_names == ["read_file"]
 
 
-def test_allowed_tools_none_passes_all() -> None:
+async def test_allowed_tools_none_passes_all() -> None:
     registry = ToolRegistry([FakeTool(n) for n in ["a", "b"]])
     resp = make_response("end_turn", [make_text_block("ok")])
     agent, create_mock = make_agent(tools=registry, mock_response=resp)
 
-    asyncio.run(agent.send("Hi", allowed_tools=None))
+    await agent.send("Hi", allowed_tools=None)
 
     sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]]
     assert sent_names == ["a", "b"]
 
 
 @pytest.mark.parametrize("allowed_tools", [[], ["nonexistent_tool"]])
-def test_allowed_tools_passes_not_given(allowed_tools: list[str]) -> None:
+async def test_allowed_tools_passes_not_given(allowed_tools: list[str]) -> None:
     resp = make_response("end_turn", [make_text_block("ok")])
     agent, create_mock = make_agent(mock_response=resp)
 
-    asyncio.run(agent.send("Hi", allowed_tools=allowed_tools))
+    await agent.send("Hi", allowed_tools=allowed_tools)
 
     assert create_mock.call_args.kwargs["tools"] is anthropic.NOT_GIVEN
 
@@ -247,17 +246,17 @@ def _make_error_agent(side_effect: Exception) -> AnthropicAgent:
     return AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
 
 
-def test_connection_error_maps_to_agent_connection_error() -> None:
+async def test_connection_error_maps_to_agent_connection_error() -> None:
     agent = _make_error_agent(anthropic.APIConnectionError(request=MagicMock()))
 
     with pytest.raises(AgentConnectionError) as exc_info:
-        asyncio.run(agent.send("Hi"))
+        await agent.send("Hi")
 
     assert "Connection failed" in str(exc_info.value)
     assert agent.history == []
 
 
-def test_rate_limit_error_maps_to_agent_rate_limit_error() -> None:
+async def test_rate_limit_error_maps_to_agent_rate_limit_error() -> None:
     agent = _make_error_agent(
         anthropic.RateLimitError(
             message="rate limit",
@@ -267,13 +266,13 @@ def test_rate_limit_error_maps_to_agent_rate_limit_error() -> None:
     )
 
     with pytest.raises(AgentRateLimitError) as exc_info:
-        asyncio.run(agent.send("Hi"))
+        await agent.send("Hi")
 
     assert "Rate limit exceeded" in str(exc_info.value)
     assert agent.history == []
 
 
-def test_api_status_error_maps_to_agent_api_error() -> None:
+async def test_api_status_error_maps_to_agent_api_error() -> None:
     agent = _make_error_agent(
         anthropic.APIStatusError(
             message="internal server error",
@@ -283,17 +282,17 @@ def test_api_status_error_maps_to_agent_api_error() -> None:
     )
 
     with pytest.raises(AgentAPIError) as exc_info:
-        asyncio.run(agent.send("Hi"))
+        await agent.send("Hi")
 
     assert exc_info.value.status_code == 500
     assert agent.history == []
 
 
-def test_response_validation_error_maps_to_agent_error() -> None:
+async def test_response_validation_error_maps_to_agent_error() -> None:
     agent = _make_error_agent(anthropic.APIResponseValidationError(response=MagicMock(), body=None))
 
     with pytest.raises(AgentError) as exc_info:
-        asyncio.run(agent.send("Hi"))
+        await agent.send("Hi")
 
     assert "Response validation failed" in str(exc_info.value)
     assert agent.history == []
@@ -304,12 +303,12 @@ def test_response_validation_error_maps_to_agent_error() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_unknown_stop_reason_raises_agent_error() -> None:
+async def test_unknown_stop_reason_raises_agent_error() -> None:
     resp = make_response("totally_unknown_reason", [])
     agent, _ = make_agent(mock_response=resp)
 
     with pytest.raises(AgentError) as exc_info:
-        asyncio.run(agent.send("Hi"))
+        await agent.send("Hi")
 
     assert agent.history == []
     assert "Unknown stop_reason" in str(exc_info.value)
@@ -321,12 +320,12 @@ def test_unknown_stop_reason_raises_agent_error() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_cache_tokens_none_defaults_to_zero() -> None:
+async def test_cache_tokens_none_defaults_to_zero() -> None:
     usage = make_usage(cache_read=None, cache_creation=None)
     resp = make_response("end_turn", [make_text_block("ok")], usage=usage)
     agent, _ = make_agent(mock_response=resp)
 
-    result = asyncio.run(agent.send("Hi"))
+    result = await agent.send("Hi")
 
     assert result.usage.cache_read_input_tokens == 0
     assert result.usage.cache_creation_input_tokens == 0
@@ -337,12 +336,12 @@ def test_cache_tokens_none_defaults_to_zero() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_context_usage_fields() -> None:
+async def test_context_usage_fields() -> None:
     usage = make_usage(input_tokens=1000, cache_read=500, cache_creation=200)
     resp = make_response("end_turn", [make_text_block("ok")], usage=usage)
     agent, _ = make_agent(mock_response=resp)
 
-    result = asyncio.run(agent.send("Hi"))
+    result = await agent.send("Hi")
 
     ctx = result.usage.context
     assert ctx.window_size == FAKE_CONTEXT_WINDOW
@@ -356,13 +355,13 @@ def test_context_usage_fields() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_context_window_fetched_once() -> None:
+async def test_context_window_fetched_once() -> None:
     resp = make_response("end_turn", [make_text_block("ok")])
     agent, _ = make_agent(mock_response=resp)
     agent._client.messages.create = AsyncMock(return_value=resp)
 
-    asyncio.run(agent.send("First"))
-    asyncio.run(agent.send("Second"))
+    await agent.send("First")
+    await agent.send("Second")
 
     agent._client.models.retrieve.assert_awaited_once()
 
@@ -372,7 +371,7 @@ def test_context_window_fetched_once() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_multi_turn_history_grows_correctly() -> None:
+async def test_multi_turn_history_grows_correctly() -> None:
     tool_resp = make_response("tool_use", [make_tool_use_block(id="toolu_01")])
     text_resp = make_response("end_turn", [make_text_block("Done.")])
 
@@ -383,12 +382,12 @@ def test_multi_turn_history_grows_correctly() -> None:
     client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW))
     agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
 
-    first = asyncio.run(agent.send("Do X"))
+    first = await agent.send("Do X")
     assert first.stop_reason is StopReason.TOOL_USE
     assert len(agent.history) == 2
 
     tool_results = [{"type": "tool_result", "tool_use_id": "toolu_01", "content": "result"}]
-    second = asyncio.run(agent.send(tool_results))
+    second = await agent.send(tool_results)
     assert second.stop_reason is StopReason.END_TURN
     assert len(agent.history) == 4
     assert agent.history[2]["role"] == "user"
@@ -400,10 +399,10 @@ def test_multi_turn_history_grows_correctly() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_history_property_returns_copy() -> None:
+async def test_history_property_returns_copy() -> None:
     resp = make_response("end_turn", [make_text_block("ok")])
     agent, _ = make_agent(mock_response=resp)
-    asyncio.run(agent.send("Hi"))
+    await agent.send("Hi")
 
     snapshot = agent.history
     snapshot.clear()
@@ -416,10 +415,10 @@ def test_history_property_returns_copy() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_reset_clears_history() -> None:
+async def test_reset_clears_history() -> None:
     resp = make_response("end_turn", [make_text_block("ok")])
     agent, _ = make_agent(mock_response=resp)
-    asyncio.run(agent.send("Hi"))
+    await agent.send("Hi")
     assert len(agent.history) == 2
 
     agent.reset()
@@ -431,7 +430,7 @@ def test_reset_clears_history() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_error_mid_conversation_leaves_history_unchanged() -> None:
+async def test_error_mid_conversation_leaves_history_unchanged() -> None:
     ok_resp = make_response("end_turn", [make_text_block("ok")])
     client = MagicMock(spec=anthropic.AsyncAnthropic)
     client.messages = MagicMock()
@@ -445,10 +444,10 @@ def test_error_mid_conversation_leaves_history_unchanged() -> None:
     client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW))
     agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
 
-    asyncio.run(agent.send("First message"))
+    await agent.send("First message")
     history_after_first = agent.history[:]
 
     with pytest.raises(AgentConnectionError):
-        asyncio.run(agent.send("Second message"))
+        await agent.send("Second message")
 
     assert agent.history == history_after_first
diff --git a/ddev/tests/ai/tools/core/test_base.py b/ddev/tests/ai/tools/core/test_base.py
index 96cd0f8b07d0c..35e94f750a69e 100644
--- a/ddev/tests/ai/tools/core/test_base.py
+++ b/ddev/tests/ai/tools/core/test_base.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from typing import Annotated
 
 import pytest
@@ -194,8 +193,8 @@ async def __call__(self, tool_input: SimpleInput) -> ToolResult:
 # --- run(): happy path ---
 
 
-def test_run_valid_input_returns_success(echo_tool: EchoTool):
-    result = asyncio.run(echo_tool.run({"message": "hello"}))
+async def test_run_valid_input_returns_success(echo_tool: EchoTool):
+    result = await echo_tool.run({"message": "hello"})
     assert result.success is True
     assert result.data == "hello"
 
@@ -210,8 +209,8 @@ def test_run_valid_input_returns_success(echo_tool: EchoTool):
         {"message": "hi", "extra": "oops"},
     ],
 )
-def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict):
-    result = asyncio.run(echo_tool.run(raw))
+async def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict):
+    result = await echo_tool.run(raw)
     assert result.success is False
     assert result.error is not None
 
@@ -219,8 +218,8 @@ def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict):
 # --- run(): __call__ exception handling ---
 
 
-def test_run_captures_exception_from_call(failing_tool: FailingTool):
-    result = asyncio.run(failing_tool.run({"message": "boom"}))
+async def test_run_captures_exception_from_call(failing_tool: FailingTool):
+    result = await failing_tool.run({"message": "boom"})
     assert isinstance(result, ToolResult)
     assert result.success is False
     assert "RuntimeError" in result.error
diff --git a/ddev/tests/ai/tools/core/test_registry.py b/ddev/tests/ai/tools/core/test_registry.py
index 245b3f67490d1..1366a9d8b5be8 100644
--- a/ddev/tests/ai/tools/core/test_registry.py
+++ b/ddev/tests/ai/tools/core/test_registry.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 
 import pytest
 
@@ -88,41 +87,41 @@ def test_definition_contains_tool_name():
 # ---------------------------------------------------------------------------
 
 
-def test_run_dispatches_to_correct_tool():
+async def test_run_dispatches_to_correct_tool():
     tool_a = FakeTool("a", ToolResult(success=True, data="from a"))
     tool_b = FakeTool("b", ToolResult(success=True, data="from b"))
     registry = ToolRegistry([tool_a, tool_b])
 
-    result = asyncio.run(registry.run("b", {}))
+    result = await registry.run("b", {})
     assert result.success is True
     assert result.data == "from b"
 
 
-def test_passes_raw_dict_to_tool_unchanged():
+async def test_passes_raw_dict_to_tool_unchanged():
     tool = FakeTool("t")
     registry = ToolRegistry([tool])
     raw = {"key": "value", "num": 42}
 
-    asyncio.run(registry.run("t", raw))
+    await registry.run("t", raw)
     assert tool.last_raw == raw
 
 
-def test_returns_tool_result_on_tool_failure():
+async def test_returns_tool_result_on_tool_failure():
     registry = ToolRegistry([FakeTool("t", ToolResult(success=False, error="bad input"))])
-    result = asyncio.run(registry.run("t", {}))
+    result = await registry.run("t", {})
     assert result.success is False
     assert result.error == "bad input"
 
 
-def test_unknown_tool_returns_failure():
+async def test_unknown_tool_returns_failure():
     registry = ToolRegistry([FakeTool("known_tool")])
-    result = asyncio.run(registry.run("unknown_tool", {}))
+    result = await registry.run("unknown_tool", {})
     assert result.success is False
     assert "Unknown tool: 'unknown_tool'" in result.error
 
 
-def test_empty_registry_always_returns_unknown_error():
+async def test_empty_registry_always_returns_unknown_error():
     registry = ToolRegistry([])
-    result = asyncio.run(registry.run("anything", {}))
+    result = await registry.run("anything", {})
     assert result.success is False
     assert result.error is not None
diff --git a/ddev/tests/ai/tools/fs/conftest.py b/ddev/tests/ai/tools/fs/conftest.py
index 8d6677b98c398..12ae9e34eb1d5 100644
--- a/ddev/tests/ai/tools/fs/conftest.py
+++ b/ddev/tests/ai/tools/fs/conftest.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 
 import pytest
 
@@ -38,8 +37,8 @@ def append_tool(registry: FileRegistry) -> AppendFileTool:
 
 
 @pytest.fixture
-def known_file(tmp_path, create_tool: CreateFileTool):
+async def known_file(tmp_path, create_tool: CreateFileTool):
     """A temp file registered in the registry via create."""
     f = tmp_path / "file.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": "line one\nline two\nline three\n"}))
+    await create_tool.run({"path": str(f), "content": "line one\nline two\nline three\n"})
     return f
diff --git a/ddev/tests/ai/tools/fs/test_append_file.py b/ddev/tests/ai/tools/fs/test_append_file.py
index 2b669572d30bb..289142e378191 100644
--- a/ddev/tests/ai/tools/fs/test_append_file.py
+++ b/ddev/tests/ai/tools/fs/test_append_file.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import patch
 
 import pytest
@@ -23,8 +22,10 @@ def test_tool_name(registry: FileRegistry) -> None:
         ("A\r\nB\r\n", "A\nB\n", "\r"),
     ],
 )
-def test_append_file_success(append_tool: AppendFileTool, known_file, content, expected_in, expected_not_in) -> None:
-    result = asyncio.run(append_tool.run({"path": str(known_file), "content": content}))
+async def test_append_file_success(
+    append_tool: AppendFileTool, known_file, content, expected_in, expected_not_in
+) -> None:
+    result = await append_tool.run({"path": str(known_file), "content": content})
 
     assert result.success is True
     text = known_file.read_text(encoding="utf-8")
@@ -33,11 +34,11 @@ def test_append_file_success(append_tool: AppendFileTool, known_file, content, e
         assert expected_not_in not in text
 
 
-def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tmp_path) -> None:
+async def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tmp_path) -> None:
     f = tmp_path / "unread.txt"
     f.write_text("content", encoding="utf-8")
 
-    result = asyncio.run(append_tool.run({"path": str(f), "content": "more"}))
+    result = await append_tool.run({"path": str(f), "content": "more"})
 
     assert result.success is False
     assert "Not authorized" in result.error
@@ -50,39 +51,39 @@ def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tm
         ("", "first line", "first line"),
     ],
 )
-def test_append_file_separator(
+async def test_append_file_separator(
     append_tool: AppendFileTool, create_tool: CreateFileTool, tmp_path, initial, appended, expected
 ) -> None:
     f = tmp_path / "file.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": initial}))
+    await create_tool.run({"path": str(f), "content": initial})
 
-    result = asyncio.run(append_tool.run({"path": str(f), "content": appended}))
+    result = await append_tool.run({"path": str(f), "content": appended})
 
     assert result.success is True
     assert f.read_text(encoding="utf-8") == expected
 
 
-def test_append_file_fails_if_file_changed_externally(append_tool: AppendFileTool, known_file) -> None:
+async def test_append_file_fails_if_file_changed_externally(append_tool: AppendFileTool, known_file) -> None:
     known_file.write_text("externally modified\n", encoding="utf-8")
 
-    result = asyncio.run(append_tool.run({"path": str(known_file), "content": "more"}))
+    result = await append_tool.run({"path": str(known_file), "content": "more"})
 
     assert result.success is False
     assert "Re-read and retry" in result.error
 
 
-def test_append_file_updates_registry(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None:
-    asyncio.run(append_tool.run({"path": str(known_file), "content": "extra\n"}))
+async def test_append_file_updates_registry(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None:
+    await append_tool.run({"path": str(known_file), "content": "extra\n"})
 
     new_content = known_file.read_text(encoding="utf-8")
     assert registry.verify(str(known_file), new_content) is True
 
 
-def test_append_file_oserror_on_write(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None:
+async def test_append_file_oserror_on_write(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None:
     original_content = known_file.read_text(encoding="utf-8")
 
     with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")):
-        result = asyncio.run(append_tool.run({"path": str(known_file), "content": "new line"}))
+        result = await append_tool.run({"path": str(known_file), "content": "new line"})
 
     assert result.success is False
     assert result.error is not None
diff --git a/ddev/tests/ai/tools/fs/test_create_file.py b/ddev/tests/ai/tools/fs/test_create_file.py
index 2714ef5bb06aa..8b0c0296fa38a 100644
--- a/ddev/tests/ai/tools/fs/test_create_file.py
+++ b/ddev/tests/ai/tools/fs/test_create_file.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import patch
 
 from ddev.ai.tools.fs.create_file import CreateFileTool
@@ -12,41 +11,41 @@ def test_tool_name(registry: FileRegistry) -> None:
     assert CreateFileTool(registry).name == "create_file"
 
 
-def test_create_file_success(create_tool: CreateFileTool, tmp_path) -> None:
+async def test_create_file_success(create_tool: CreateFileTool, tmp_path) -> None:
     f = tmp_path / "new.txt"
 
-    result = asyncio.run(create_tool.run({"path": str(f), "content": "hello"}))
+    result = await create_tool.run({"path": str(f), "content": "hello"})
 
     assert result.success is True
     assert f.read_text(encoding="utf-8") == "hello"
 
 
-def test_create_file_default_empty_content(create_tool: CreateFileTool, tmp_path) -> None:
+async def test_create_file_default_empty_content(create_tool: CreateFileTool, tmp_path) -> None:
     f = tmp_path / "empty.txt"
 
-    result = asyncio.run(create_tool.run({"path": str(f)}))
+    result = await create_tool.run({"path": str(f)})
 
     assert result.success is True
     assert f.read_text(encoding="utf-8") == ""
 
 
-def test_create_file_creates_missing_parent_dirs(create_tool: CreateFileTool, tmp_path) -> None:
+async def test_create_file_creates_missing_parent_dirs(create_tool: CreateFileTool, tmp_path) -> None:
     f = tmp_path / "a" / "b" / "c" / "file.txt"
 
-    result = asyncio.run(create_tool.run({"path": str(f), "content": "nested"}))
+    result = await create_tool.run({"path": str(f), "content": "nested"})
 
     assert result.success is True
     assert f.exists()
     assert f.read_text(encoding="utf-8") == "nested"
 
 
-def test_create_file_fails_if_file_already_exists(
+async def test_create_file_fails_if_file_already_exists(
     create_tool: CreateFileTool, registry: FileRegistry, tmp_path
 ) -> None:
     f = tmp_path / "existing.txt"
     f.write_text("original", encoding="utf-8")
 
-    result = asyncio.run(create_tool.run({"path": str(f), "content": "new"}))
+    result = await create_tool.run({"path": str(f), "content": "new"})
 
     assert result.success is False
     assert result.error is not None
@@ -54,19 +53,19 @@ def test_create_file_fails_if_file_already_exists(
     assert not registry.is_known(str(f))
 
 
-def test_create_tool_registers_in_registry(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
+async def test_create_tool_registers_in_registry(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
     f = tmp_path / "file.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": "hi"}))
+    await create_tool.run({"path": str(f), "content": "hi"})
 
     assert registry.is_known(str(f)) is True
     assert registry.verify(str(f), "hi") is True
 
 
-def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
+async def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
     f = tmp_path / "a" / "b" / "new.txt"
 
     with patch("pathlib.Path.mkdir", side_effect=PermissionError("permission denied")):
-        result = asyncio.run(create_tool.run({"path": str(f), "content": "hi"}))
+        result = await create_tool.run({"path": str(f), "content": "hi"})
 
     assert result.success is False
     assert result.error is not None
@@ -74,11 +73,11 @@ def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: Fil
     assert not registry.is_known(str(f))
 
 
-def test_create_file_oserror_on_write(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
+async def test_create_file_oserror_on_write(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
     f = tmp_path / "new.txt"
 
     with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")):
-        result = asyncio.run(create_tool.run({"path": str(f), "content": "hi"}))
+        result = await create_tool.run({"path": str(f), "content": "hi"})
 
     assert result.success is False
     assert result.error is not None
diff --git a/ddev/tests/ai/tools/fs/test_edit_file.py b/ddev/tests/ai/tools/fs/test_edit_file.py
index cbfd48a78c193..27c8b87cedce2 100644
--- a/ddev/tests/ai/tools/fs/test_edit_file.py
+++ b/ddev/tests/ai/tools/fs/test_edit_file.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import patch
 
 import pytest
@@ -15,8 +14,8 @@ def test_tool_name(registry: FileRegistry) -> None:
     assert EditFileTool(registry).name == "edit_file"
 
 
-def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None:
-    result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line two", "new_string": "line TWO"}))
+async def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None:
+    result = await edit_tool.run({"path": str(known_file), "old_string": "line two", "new_string": "line TWO"})
 
     assert result.success is True
     content = known_file.read_text(encoding="utf-8")
@@ -24,54 +23,56 @@ def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None:
     assert "line two" not in content
 
 
-def test_edit_file_deletes_line(edit_tool: EditFileTool, known_file) -> None:
-    result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line two\n", "new_string": ""}))
+async def test_edit_file_deletes_line(edit_tool: EditFileTool, known_file) -> None:
+    result = await edit_tool.run({"path": str(known_file), "old_string": "line two\n", "new_string": ""})
 
     assert result.success is True
     assert "line two" not in known_file.read_text(encoding="utf-8")
 
 
-def test_edit_file_fails_for_unregistered_file(edit_tool: EditFileTool, tmp_path) -> None:
+async def test_edit_file_fails_for_unregistered_file(edit_tool: EditFileTool, tmp_path) -> None:
     f = tmp_path / "unread.txt"
     f.write_text("content", encoding="utf-8")
 
-    result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "content", "new_string": "new"}))
+    result = await edit_tool.run({"path": str(f), "old_string": "content", "new_string": "new"})
 
     assert result.success is False
     assert "Not authorized" in result.error
 
 
 @pytest.mark.parametrize("old_string", ["does not exist", ""])
-def test_edit_file_fails_if_old_string_not_found_or_empty(edit_tool: EditFileTool, known_file, old_string) -> None:
-    result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": old_string, "new_string": "x"}))
+async def test_edit_file_fails_if_old_string_not_found_or_empty(
+    edit_tool: EditFileTool, known_file, old_string
+) -> None:
+    result = await edit_tool.run({"path": str(known_file), "old_string": old_string, "new_string": "x"})
 
     assert result.success is False
 
 
-def test_edit_file_fails_if_old_string_ambiguous(
+async def test_edit_file_fails_if_old_string_ambiguous(
     edit_tool: EditFileTool, create_tool: CreateFileTool, tmp_path
 ) -> None:
     f = tmp_path / "dup.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": "foo\nfoo\nfoo\n"}))
+    await create_tool.run({"path": str(f), "content": "foo\nfoo\nfoo\n"})
 
-    result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "foo", "new_string": "bar"}))
+    result = await edit_tool.run({"path": str(f), "old_string": "foo", "new_string": "bar"})
 
     assert result.success is False
     assert "3" in result.error
     assert result.hint is not None
 
 
-def test_edit_file_fails_if_file_changed_externally(edit_tool: EditFileTool, known_file) -> None:
+async def test_edit_file_fails_if_file_changed_externally(edit_tool: EditFileTool, known_file) -> None:
     known_file.write_text("externally modified\n", encoding="utf-8")
 
-    result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"}))
+    result = await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"})
 
     assert result.success is False
     assert "Re-read and retry" in result.error
 
 
-def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None:
-    asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "LINE ONE"}))
+async def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None:
+    await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "LINE ONE"})
 
     new_content = known_file.read_text(encoding="utf-8")
     assert registry.verify(str(known_file), new_content) is True
@@ -85,23 +86,23 @@ def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegis
         ("line one\n", "line one", "A\r\nB", "A\nB\n"),  # CRLF in new_string
     ],
 )
-def test_edit_file_normalizes_crlf(
+async def test_edit_file_normalizes_crlf(
     edit_tool: EditFileTool, create_tool: CreateFileTool, tmp_path, file_content, old_string, new_string, expected
 ) -> None:
     f = tmp_path / "file.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": file_content}))
+    await create_tool.run({"path": str(f), "content": file_content})
 
-    result = asyncio.run(edit_tool.run({"path": str(f), "old_string": old_string, "new_string": new_string}))
+    result = await edit_tool.run({"path": str(f), "old_string": old_string, "new_string": new_string})
 
     assert result.success is True
     assert f.read_text(encoding="utf-8") == expected
 
 
-def test_edit_file_oserror_on_write(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None:
+async def test_edit_file_oserror_on_write(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None:
     original_content = known_file.read_text(encoding="utf-8")
 
     with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")):
-        result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"}))
+        result = await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"})
 
     assert result.success is False
     assert result.error is not None
diff --git a/ddev/tests/ai/tools/fs/test_read_file.py b/ddev/tests/ai/tools/fs/test_read_file.py
index f1b8da06d91ed..f2497e6c09a18 100644
--- a/ddev/tests/ai/tools/fs/test_read_file.py
+++ b/ddev/tests/ai/tools/fs/test_read_file.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import patch
 
 import pytest
@@ -14,47 +13,47 @@ def test_tool_name(registry: FileRegistry) -> None:
     assert ReadFileTool(registry).name == "read_file"
 
 
-def test_read_file_success(read_tool: ReadFileTool, tmp_path) -> None:
+async def test_read_file_success(read_tool: ReadFileTool, tmp_path) -> None:
     f = tmp_path / "config.txt"
     f.write_text("hello\nworld\n", encoding="utf-8")
 
-    result = asyncio.run(read_tool.run({"path": str(f)}))
+    result = await read_tool.run({"path": str(f)})
 
     assert result.success is True
     assert result.data == "0: hello\n1: world\n"
 
 
-def test_read_registers_unknown_file(read_tool: ReadFileTool, registry: FileRegistry, tmp_path) -> None:
+async def test_read_registers_unknown_file(read_tool: ReadFileTool, registry: FileRegistry, tmp_path) -> None:
     f = tmp_path / "file.txt"
     f.write_text("content", encoding="utf-8")
-    asyncio.run(read_tool.run({"path": str(f)}))
+    await read_tool.run({"path": str(f)})
 
     assert registry.is_known(str(f)) is True
 
 
-def test_read_file_missing_file(read_tool: ReadFileTool, tmp_path) -> None:
-    result = asyncio.run(read_tool.run({"path": str(tmp_path / "ghost.txt")}))
+async def test_read_file_missing_file(read_tool: ReadFileTool, tmp_path) -> None:
+    result = await read_tool.run({"path": str(tmp_path / "ghost.txt")})
 
     assert result.success is False
     assert str(tmp_path / "ghost.txt") in result.error
 
 
-def test_read_file_permission_error(read_tool: ReadFileTool, tmp_path) -> None:
+async def test_read_file_permission_error(read_tool: ReadFileTool, tmp_path) -> None:
     f = tmp_path / "secret.txt"
     f.write_text("secret", encoding="utf-8")
 
     with patch("pathlib.Path.read_text", side_effect=PermissionError("permission denied")):
-        result = asyncio.run(read_tool.run({"path": str(f)}))
+        result = await read_tool.run({"path": str(f)})
 
     assert result.success is False
     assert result.error is not None
 
 
-def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None:
+async def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None:
     f = tmp_path / "binary.bin"
     f.write_bytes(b"\xff\xfe\x00binary")
 
-    result = asyncio.run(read_tool.run({"path": str(f)}))
+    result = await read_tool.run({"path": str(f)})
 
     assert result.success is False
     assert result.error is not None
@@ -71,23 +70,23 @@ def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None:
         (100, None, ""),  # offset beyond EOF
     ],
 )
-def test_read_file_with_offset_and_limit(read_tool: ReadFileTool, tmp_path, offset, limit, expected) -> None:
+async def test_read_file_with_offset_and_limit(read_tool: ReadFileTool, tmp_path, offset, limit, expected) -> None:
     f = tmp_path / "file.txt"
     f.write_text("a\nb\nc\n", encoding="utf-8")
 
-    result = asyncio.run(read_tool.run({"path": str(f), "offset": offset, "limit": limit}))
+    result = await read_tool.run({"path": str(f), "offset": offset, "limit": limit})
 
     assert result.success is True
     assert result.data == expected
 
 
-def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None:
+async def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None:
     from ddev.ai.tools.core.truncation import MAX_CHARS
 
     f = tmp_path / "large.txt"
     f.write_text("x" * (MAX_CHARS + 1000), encoding="utf-8")
 
-    result = asyncio.run(read_tool.run({"path": str(f)}))
+    result = await read_tool.run({"path": str(f)})
 
     assert result.success is True
     assert result.truncated is True
@@ -95,11 +94,11 @@ def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None:
     assert result.hint is not None
 
 
-def test_read_file_no_trailing_newline(read_tool: ReadFileTool, tmp_path) -> None:
+async def test_read_file_no_trailing_newline(read_tool: ReadFileTool, tmp_path) -> None:
     f = tmp_path / "file.txt"
     f.write_text("no newline at end", encoding="utf-8")
 
-    result = asyncio.run(read_tool.run({"path": str(f)}))
+    result = await read_tool.run({"path": str(f)})
 
     assert result.success is True
     assert result.data == "0: no newline at end"
diff --git a/ddev/tests/ai/tools/fs/test_workflow.py b/ddev/tests/ai/tools/fs/test_workflow.py
index 077f63189bf91..a45ad9d937e26 100644
--- a/ddev/tests/ai/tools/fs/test_workflow.py
+++ b/ddev/tests/ai/tools/fs/test_workflow.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 
 from ddev.ai.tools.fs.append_file import AppendFileTool
 from ddev.ai.tools.fs.create_file import CreateFileTool
@@ -10,7 +9,7 @@
 from ddev.ai.tools.fs.read_file import ReadFileTool
 
 
-def test_workflow_create_read_edit_append(
+async def test_workflow_create_read_edit_append(
     create_tool: CreateFileTool,
     read_tool: ReadFileTool,
     edit_tool: EditFileTool,
@@ -21,20 +20,20 @@ def test_workflow_create_read_edit_append(
     f = tmp_path / "workflow.txt"
 
     # Step 1: create
-    r = asyncio.run(create_tool.run({"path": str(f), "content": "version: 1\n"}))
+    r = await create_tool.run({"path": str(f), "content": "version: 1\n"})
     assert r.success is True
 
     # Step 2: read (registers current content)
-    r = asyncio.run(read_tool.run({"path": str(f)}))
+    r = await read_tool.run({"path": str(f)})
     assert r.success is True
 
     # Step 3: edit
-    r = asyncio.run(edit_tool.run({"path": str(f), "old_string": "version: 1", "new_string": "version: 2"}))
+    r = await edit_tool.run({"path": str(f), "old_string": "version: 1", "new_string": "version: 2"})
     assert r.success is True
     assert "version: 2" in f.read_text(encoding="utf-8")
 
     # Step 4: append
-    r = asyncio.run(append_tool.run({"path": str(f), "content": "# updated\n"}))
+    r = await append_tool.run({"path": str(f), "content": "# updated\n"})
     assert r.success is True
     assert f.read_text(encoding="utf-8").endswith("# updated\n")
 
@@ -42,22 +41,22 @@ def test_workflow_create_read_edit_append(
     assert registry.verify(str(f), f.read_text(encoding="utf-8")) is True
 
 
-def test_workflow_stale_file(
+async def test_workflow_stale_file(
     create_tool: CreateFileTool,
     read_tool: ReadFileTool,
     edit_tool: EditFileTool,
     tmp_path,
 ) -> None:
     f = tmp_path / "shared.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": "original\n"}))
+    await create_tool.run({"path": str(f), "content": "original\n"})
     f.write_text("updated externally\n", encoding="utf-8")
 
-    result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "original", "new_string": "my edit"}))
+    result = await edit_tool.run({"path": str(f), "old_string": "original", "new_string": "my edit"})
     assert result.success is False
     assert "Re-read and retry" in result.error
 
-    asyncio.run(read_tool.run({"path": str(f)}))
+    await read_tool.run({"path": str(f)})
 
-    result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "updated externally", "new_string": "final"}))
+    result = await edit_tool.run({"path": str(f), "old_string": "updated externally", "new_string": "final"})
     assert result.success is True
     assert f.read_text(encoding="utf-8") == "final\n"
diff --git a/ddev/tests/ai/tools/http/test_http_get.py b/ddev/tests/ai/tools/http/test_http_get.py
index d2e8c06220fa1..2cb871bdfd62a 100644
--- a/ddev/tests/ai/tools/http/test_http_get.py
+++ b/ddev/tests/ai/tools/http/test_http_get.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import httpx
@@ -51,8 +50,8 @@ def test_tool_meta(http_tool: HttpGetTool) -> None:
 
 
 @pytest.mark.parametrize("url", ["ftp://example.com", "example.com", "", "//example.com"])
-def test_invalid_url(http_tool: HttpGetTool, url: str) -> None:
-    result = asyncio.run(http_tool.run({"url": url}))
+async def test_invalid_url(http_tool: HttpGetTool, url: str) -> None:
+    result = await http_tool.run({"url": url})
 
     assert result.success is False
     assert "http" in result.error and "https" in result.error
@@ -71,9 +70,9 @@ def test_invalid_url(http_tool: HttpGetTool, url: str) -> None:
         (204, ""),
     ],
 )
-def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) -> None:
+async def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) -> None:
     with patch_httpx(fake_response(status_code, body)):
-        result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"}))
+        result = await http_tool.run({"url": "http://localhost:9090/metrics"})
 
     assert result.success is True
     assert f"Status: {status_code}" in result.data
@@ -81,9 +80,9 @@ def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) ->
 
 
 @pytest.mark.parametrize("status_code", [400, 404, 500, 503])
-def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) -> None:
+async def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) -> None:
     with patch_httpx(fake_response(status_code, "error body")):
-        result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"}))
+        result = await http_tool.run({"url": "http://localhost:9090/metrics"})
 
     assert result.success is True
     assert f"Status: {status_code}" in result.data
@@ -94,17 +93,17 @@ def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) ->
 # ---------------------------------------------------------------------------
 
 
-def test_request_timeout(http_tool: HttpGetTool) -> None:
+async def test_request_timeout(http_tool: HttpGetTool) -> None:
     with patch_httpx(side_effect=httpx.TimeoutException("timed out")):
-        result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics", "timeout": 1.0}))
+        result = await http_tool.run({"url": "http://localhost:9090/metrics", "timeout": 1.0})
 
     assert result.success is False
     assert "timed out after 1.0s" in result.error
 
 
-def test_request_error(http_tool: HttpGetTool) -> None:
+async def test_request_error(http_tool: HttpGetTool) -> None:
     with patch_httpx(side_effect=httpx.RequestError("connection refused")):
-        result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"}))
+        result = await http_tool.run({"url": "http://localhost:9090/metrics"})
 
     assert result.success is False
     assert "Request failed" in result.error
@@ -116,12 +115,12 @@ def test_request_error(http_tool: HttpGetTool) -> None:
 
 
 @pytest.mark.parametrize("status_code", [200, 500])
-def test_response_truncated(http_tool: HttpGetTool, status_code: int) -> None:
+async def test_response_truncated(http_tool: HttpGetTool, status_code: int) -> None:
     from ddev.ai.tools.core.truncation import MAX_CHARS
 
     large_body = "x" * (MAX_CHARS + 1000)
     with patch_httpx(fake_response(status_code, large_body)):
-        result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"}))
+        result = await http_tool.run({"url": "http://localhost:9090/metrics"})
 
     assert result.success is True
     assert result.truncated is True
diff --git a/ddev/tests/ai/tools/shell/test_base.py b/ddev/tests/ai/tools/shell/test_base.py
index 5d7431239a5e7..3568170b9092d 100644
--- a/ddev/tests/ai/tools/shell/test_base.py
+++ b/ddev/tests/ai/tools/shell/test_base.py
@@ -79,42 +79,42 @@ def slow_greet_tool() -> SlowGreetTool:
 # ---------------------------------------------------------------------------
 
 
-def test_run_command_success(proc):
+async def test_run_command_success(proc):
     with patch_proc(proc):
-        result = asyncio.run(run_command(["echo", "hello"]))
+        result = await run_command(["echo", "hello"])
     assert result.success is True
     assert result.data == "hello\n"
     assert result.truncated is False
 
 
-def test_run_command_failure_combines_stdout_and_stderr():
+async def test_run_command_failure_combines_stdout_and_stderr():
     proc = make_proc(returncode=1, stdout=b"partial\n", stderr=b"error\n")
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.success is False
     assert "partial" in result.data
     assert "error" in result.data
 
 
-def test_run_command_failure_stderr_only_when_no_stdout():
+async def test_run_command_failure_stderr_only_when_no_stdout():
     proc = make_proc(returncode=1, stdout=b"", stderr=b"fatal error\n")
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.success is False and result.data == "fatal error\n"
 
 
-def test_run_command_ignores_stderr_on_zero_exit():
+async def test_run_command_ignores_stderr_on_zero_exit():
     proc = make_proc(returncode=0, stdout=b"out\n", stderr=b"warning\n")
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.success is True
     assert "warning" not in result.data
 
 
-def test_run_command_stderr_included_when_stdout_empty_on_success():
+async def test_run_command_stderr_included_when_stdout_empty_on_success():
     proc = make_proc(returncode=0, stdout=b"", stderr=b"info: initialized\n")
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.success is True
     assert result.data == "info: initialized\n"
 
@@ -127,10 +127,10 @@ def test_run_command_stderr_included_when_stdout_empty_on_success():
         (1, b"", b""),
     ],
 )
-def test_run_command_empty_output(returncode, stdout, stderr):
+async def test_run_command_empty_output(returncode, stdout, stderr):
     proc = make_proc(returncode=returncode, stdout=stdout, stderr=stderr)
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.data == "(no output)"
 
 
@@ -139,27 +139,27 @@ def test_run_command_empty_output(returncode, stdout, stderr):
 # ---------------------------------------------------------------------------
 
 
-def test_run_command_not_found():
+async def test_run_command_not_found():
     with patch("asyncio.create_subprocess_exec", side_effect=FileNotFoundError()):
-        result = asyncio.run(run_command(["nonexistent"]))
+        result = await run_command(["nonexistent"])
     assert result.success is False
     assert "Command not found" in result.error
     assert "nonexistent" in result.error
 
 
-def test_run_command_timeout():
+async def test_run_command_timeout():
     proc = make_proc()
     with patch_proc(proc):
         with patch("asyncio.wait_for", new=_raise_timeout):
-            result = asyncio.run(run_command(["sleep", "100"], timeout=5))
+            result = await run_command(["sleep", "100"], timeout=5)
     assert result.success is False
     assert "5s" in result.error
     proc.kill.assert_called_once()
 
 
-def test_run_command_unexpected_exception():
+async def test_run_command_unexpected_exception():
     with patch("asyncio.create_subprocess_exec", side_effect=OSError("permission denied")):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.success is False
     assert "OSError" in result.error
     assert "permission denied" in result.error
@@ -170,21 +170,21 @@ def test_run_command_unexpected_exception():
 # ---------------------------------------------------------------------------
 
 
-def test_run_command_truncation():
+async def test_run_command_truncation():
     large = ("x" * 80 + "\n") * 700
     proc = make_proc(stdout=large.encode())
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.truncated is True
     assert result.total_size == len(large)
     assert result.shown_size == len(result.data)
     assert result.hint is not None
 
 
-def test_run_command_no_truncation_at_limit():
+async def test_run_command_no_truncation_at_limit():
     proc = make_proc(stdout=("x" * MAX_CHARS).encode())
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.truncated is False
     assert result.total_size is None
     assert result.hint is None
@@ -200,10 +200,10 @@ def test_cmd_tool_timeouts(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool
     assert SlowGreetTool.timeout == 60  # custom timeout
 
 
-def test_cmd_tool_dispatches_with_correct_timeout(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool):
+async def test_cmd_tool_dispatches_with_correct_timeout(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool):
     for tool, expected_timeout in [(greet_tool, 10), (slow_greet_tool, 60)]:
         with patch(
             "ddev.ai.tools.shell.base.run_command", new=AsyncMock(return_value=ToolResult(success=True))
         ) as mock_run:
-            asyncio.run(tool.run({"name": "world"}))
+            await tool.run({"name": "world"})
         mock_run.assert_called_once_with(["echo", "hello world"], timeout=expected_timeout)
diff --git a/ddev/tests/ai/tools/shell/test_tools.py b/ddev/tests/ai/tools/shell/test_tools.py
index 81fcb45d3d3b1..05084acc97e9e 100644
--- a/ddev/tests/ai/tools/shell/test_tools.py
+++ b/ddev/tests/ai/tools/shell/test_tools.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import AsyncMock, patch
 
 import pytest
@@ -66,12 +65,12 @@ def test_grep_cmd_pattern_and_path_placement(grep_tool: GrepTool):
     assert cmd[-1] == "/my dir/sub dir"
 
 
-def test_grep_no_matches_returns_success(grep_tool: GrepTool):
+async def test_grep_no_matches_returns_success(grep_tool: GrepTool):
     from ddev.ai.tools.core.types import ToolResult
 
     no_match_result = ToolResult(success=False, data="(no output)", error=None)
     with patch("ddev.ai.tools.shell.grep.run_command", new=AsyncMock(return_value=no_match_result)):
-        result = asyncio.run(grep_tool(GrepInput(pattern="nomatch", path="/tmp")))
+        result = await grep_tool(GrepInput(pattern="nomatch", path="/tmp"))
     assert result.success is True
     assert result.data == "(no output)"