diff --git a/ddev/hatch.toml b/ddev/hatch.toml
index 8a07ebbf5c149..a5fa11668e95f 100644
--- a/ddev/hatch.toml
+++ b/ddev/hatch.toml
@@ -9,6 +9,7 @@ python = "3.13"
 e2e-env = false
 dependencies = [
   "pyyaml",
+  "pytest-asyncio",
   "vcrpy",
 ]
 # TODO: remove this when the old CLI is gone
diff --git a/ddev/pyproject.toml b/ddev/pyproject.toml
index 8cfbc07271cf7..33af6ea6cbbeb 100644
--- a/ddev/pyproject.toml
+++ b/ddev/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
-    "anthropic>=0.18.0",
+    "anthropic>=0.86.0",
     "click~=8.1.6",
     "coverage",
     "datadog-api-client==2.20.0",
@@ -136,3 +136,6 @@ ban-relative-imports = "parents"
 [tool.ruff.lint.per-file-ignores]
 #Tests can use assertions and relative imports
 "**/tests/**/*" = ["I252"]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
diff --git a/ddev/src/ddev/ai/agent/__init__.py b/ddev/src/ddev/ai/agent/__init__.py
new file mode 100644
index 0000000000000..75c6647cb9233
--- /dev/null
+++ b/ddev/src/ddev/ai/agent/__init__.py
@@ -0,0 +1,3 @@
+# (C) Datadog, Inc. 2026-present
+# All rights reserved
+# Licensed under a 3-clause BSD style license (see LICENSE)
diff --git a/ddev/src/ddev/ai/agent/client.py b/ddev/src/ddev/ai/agent/client.py
new file mode 100644
index 0000000000000..d576429015cef
--- /dev/null
+++ b/ddev/src/ddev/ai/agent/client.py
@@ -0,0 +1,219 @@
+# (C) Datadog, Inc. 2026-present
+# All rights reserved
+# Licensed under a 3-clause BSD style license (see LICENSE)
+
+from copy import deepcopy
+from dataclasses import dataclass
+from enum import StrEnum
+from typing import Any, Final
+
+import anthropic
+from anthropic.types import MessageParam, ToolParam, ToolResultBlockParam
+
+from ddev.ai.tools.core.registry import ToolRegistry
+
+from .exceptions import (
+    AgentAPIError,
+    AgentConnectionError,
+    AgentError,
+    AgentRateLimitError,
+)
+
+DEFAULT_MODEL: Final[str] = "claude-sonnet-4-6"
+DEFAULT_MAX_TOKENS: Final[int] = 8192  # max tokens per response
+ALLOWED_TOOL_CALLERS: Final = ["code_execution_20260120"]
+
+
+class StopReason(StrEnum):
+    """Maps Anthropic API stop_reason strings to a typed enum."""
+
+    END_TURN = "end_turn"
+    MAX_TOKENS = "max_tokens"
+    STOP_SEQUENCE = "stop_sequence"
+    TOOL_USE = "tool_use"
+    PAUSE_TURN = "pause_turn"
+    REFUSAL = "refusal"
+
+
+@dataclass(frozen=True)
+class ToolCall:
+    """A single tool invocation requested by the model."""
+
+    id: str
+    name: str
+    input: dict[str, Any]
+
+
+@dataclass(frozen=True)
+class ContextUsage:
+    """Context window accounting for a single API call."""
+
+    window_size: int
+    used_tokens: int
+
+    @property
+    def context_pct(self) -> float:
+        return self.used_tokens / self.window_size * 100
+
+    @property
+    def remaining_tokens(self) -> int:
+        return self.window_size - self.used_tokens
+
+
+@dataclass(frozen=True)
+class TokenUsage:
+    """Token accounting from a single API call."""
+
+    input_tokens: int  # tokens sent to the model (system_prompt + history)
+    output_tokens: int  # tokens the model generated
+    cache_read_input_tokens: int  # tokens read from prompt cache
+    cache_creation_input_tokens: int  # tokens written to prompt cache
+    context: ContextUsage
+
+
+@dataclass(frozen=True)
+class AgentResponse:
+    """The complete response from a single AnthropicAgent.send() call.
+    Adds useful metadata to the response of the Anthropic API."""
+
+    stop_reason: StopReason
+    text: str
+    tool_calls: list[ToolCall]
+    usage: TokenUsage
+
+
+class AnthropicAgent:
+    """A wrapper around the Anthropic API that provides a simple interface for interacting with agents."""
+
+    def __init__(
+        self,
+        client: anthropic.AsyncAnthropic,
+        tools: ToolRegistry,
+        system_prompt: str,
+        name: str,
+        model: str = DEFAULT_MODEL,
+        max_tokens: int = DEFAULT_MAX_TOKENS,
+        programmatic_tool_calling: bool = False,
+    ) -> None:
+        """Initialize an AnthropicAgent.
+        Args:
+            client: The Anthropic client to use.
+            tools: The ToolRegistry to use (might not be used in every call if allowed_tools in send() is provided)
+            system_prompt: The system prompt to use.
+            name: The name of the agent.
+            model: The model to use.
+            max_tokens: The max tokens per response.
+            programmatic_tool_calling: Whether to allow programmatic tool calling.
+        """
+
+        self._client = client
+        self._tools = tools
+        self._system_prompt = system_prompt
+        self.name = name
+        self._model = model
+        self._max_tokens = max_tokens
+        self._programmatic_tool_calling = programmatic_tool_calling
+        self._history: list[MessageParam] = []
+        self._context_window: int | None = None
+
+    @property
+    def history(self) -> list[MessageParam]:
+        """Read-only snapshot of the conversation history."""
+        return deepcopy(self._history)
+
+    def reset(self) -> None:
+        """Clear conversation history to start a new conversation."""
+        self._history = []
+
+    async def _get_context_window(self) -> int:
+        if self._context_window is None:
+            info = await self._client.models.retrieve(self._model)
+            self._context_window = info.max_input_tokens
+        return self._context_window
+
+    def _get_tool_definitions(self, allowed_tools: list[str] | None) -> list[ToolParam]:
+        """Filter tool definitions by allowlist. None means all tools."""
+        definitions = self._tools.definitions
+        if allowed_tools is not None:
+            allowed = set(allowed_tools)
+            definitions = [d for d in definitions if d["name"] in allowed]
+        if not self._programmatic_tool_calling:
+            definitions = [{**d, "allowed_callers": ALLOWED_TOOL_CALLERS} for d in definitions]
+        return definitions
+
+    async def send(
+        self,
+        content: str | list[ToolResultBlockParam],
+        allowed_tools: list[str] | None = None,
+    ) -> AgentResponse:
+        """Send a message to the agent and return the response.
+        Args:
+            content: The content to send to the agent.
+            allowed_tools: The tools in the ToolRegistry to allow the agent to use.
+        Returns:
+            An AgentResponse object containing the response from the agent.
+        """
+        tool_defs = self._get_tool_definitions(allowed_tools)
+
+        user_msg: MessageParam = {"role": "user", "content": content}
+        messages = [*self._history, user_msg]
+
+        try:
+            response = await self._client.messages.create(
+                model=self._model,
+                max_tokens=self._max_tokens,
+                system=self._system_prompt,
+                messages=messages,
+                tools=tool_defs if tool_defs else anthropic.NOT_GIVEN,
+            )
+        except anthropic.APIConnectionError as e:
+            raise AgentConnectionError(f"Connection failed: {e}") from e
+        except anthropic.RateLimitError as e:
+            raise AgentRateLimitError(f"Rate limit exceeded: {e}") from e
+        except anthropic.APIStatusError as e:
+            raise AgentAPIError(e.status_code, e.message) from e
+        except anthropic.APIResponseValidationError as e:
+            raise AgentError(f"Response validation failed: {e}") from e
+
+        # stop_reason is None only in streaming responses; we use non-streaming, so None is unexpected
+        if response.stop_reason is None:
+            raise AgentError("Received null stop_reason from API")
+
+        try:
+            stop_reason = StopReason(response.stop_reason)
+        except ValueError as e:
+            raise AgentError(f"Unknown stop_reason: {response.stop_reason!r}") from e
+
+        text_parts: list[str] = []
+        tool_calls: list[ToolCall] = []
+
+        for block in response.content:
+            if isinstance(block, anthropic.types.TextBlock):
+                text_parts.append(block.text)
+            elif isinstance(block, anthropic.types.ToolUseBlock):
+                tool_calls.append(ToolCall(id=block.id, name=block.name, input=dict(block.input)))
+        # ThinkingBlock and RedactedThinkingBlock are intentionally ignored.
+        # Extended thinking support can add a `thinking: str` field to AgentResponse later.
+
+        cache_read = response.usage.cache_read_input_tokens or 0
+        cache_creation = response.usage.cache_creation_input_tokens or 0
+        used_tokens = response.usage.input_tokens + cache_read + cache_creation
+        usage = TokenUsage(
+            input_tokens=response.usage.input_tokens,
+            output_tokens=response.usage.output_tokens,
+            cache_read_input_tokens=cache_read,
+            cache_creation_input_tokens=cache_creation,
+            context=ContextUsage(window_size=await self._get_context_window(), used_tokens=used_tokens),
+        )
+
+        agent_response = AgentResponse(
+            stop_reason=stop_reason,
+            text="\n".join(text_parts),
+            tool_calls=tool_calls,
+            usage=usage,
+        )
+
+        # Save to history only after a successful response.
+        self._history.extend([user_msg, {"role": "assistant", "content": response.content}])
+
+        return agent_response
diff --git a/ddev/src/ddev/ai/agent/exceptions.py b/ddev/src/ddev/ai/agent/exceptions.py
new file mode 100644
index 0000000000000..d0d25d3665239
--- /dev/null
+++ b/ddev/src/ddev/ai/agent/exceptions.py
@@ -0,0 +1,29 @@
+# (C) Datadog, Inc. 2026-present
+# All rights reserved
+# Licensed under a 3-clause BSD style license (see LICENSE)
+
+
+class AgentError(Exception):
+    """Base class for all errors raised by AnthropicAgent."""
+
+    pass
+
+
+class AgentConnectionError(AgentError):
+    """Network failure — the API was unreachable."""
+
+    pass
+
+
+class AgentRateLimitError(AgentError):
+    """Rate limit hit — the request may be retried after a delay."""
+
+    pass
+
+
+class AgentAPIError(AgentError):
+    """The API returned an error status code."""
+
+    def __init__(self, status_code: int, message: str) -> None:
+        super().__init__(message)
+        self.status_code = status_code
diff --git a/ddev/src/ddev/ai/tools/core/registry.py b/ddev/src/ddev/ai/tools/core/registry.py
index 29c6f92fb8801..240e969a81843 100644
--- a/ddev/src/ddev/ai/tools/core/registry.py
+++ b/ddev/src/ddev/ai/tools/core/registry.py
@@ -2,15 +2,11 @@
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
 
-from typing import Final
-
 from anthropic.types import ToolParam
 
 from .protocol import ToolProtocol
 from .types import ToolResult
 
-ALLOWED_TOOL_CALLERS: Final = ["code_execution_20260120"]
-
 
 class ToolRegistry:
     """Registry holding all available tools."""
@@ -20,9 +16,8 @@ def __init__(self, tools: list[ToolProtocol]) -> None:
 
     @property
     def definitions(self) -> list[ToolParam]:
-        """Return Anthropic SDK tool definitions for all registered tools.
-        Each tool definition dict is not mutated, but a new dict is returned with the allowed_callers key added."""
-        return [{**tool.definition, "allowed_callers": ALLOWED_TOOL_CALLERS} for tool in self._tools.values()]
+        """Return Anthropic SDK tool definitions for all registered tools."""
+        return [tool.definition for tool in self._tools.values()]
 
     async def run(self, name: str, raw: dict[str, object]) -> ToolResult:
         """Execute a tool by name, returning an error result if not found."""
diff --git a/ddev/tests/ai/agent/__init__.py b/ddev/tests/ai/agent/__init__.py
new file mode 100644
index 0000000000000..75c6647cb9233
--- /dev/null
+++ b/ddev/tests/ai/agent/__init__.py
@@ -0,0 +1,3 @@
+# (C) Datadog, Inc. 2026-present
+# All rights reserved
+# Licensed under a 3-clause BSD style license (see LICENSE)
diff --git a/ddev/tests/ai/agent/test_client.py b/ddev/tests/ai/agent/test_client.py
new file mode 100644
index 0000000000000..f4d1b9f5e8c96
--- /dev/null
+++ b/ddev/tests/ai/agent/test_client.py
@@ -0,0 +1,453 @@
+# (C) Datadog, Inc. 2026-present
+# All rights reserved
+# Licensed under a 3-clause BSD style license (see LICENSE)
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import anthropic
+import pytest
+
+from ddev.ai.agent.client import AnthropicAgent, StopReason
+from ddev.ai.agent.exceptions import (
+    AgentAPIError,
+    AgentConnectionError,
+    AgentError,
+    AgentRateLimitError,
+)
+from ddev.ai.tools.core.registry import ToolRegistry
+from ddev.ai.tools.core.types import ToolResult
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def make_usage(
+    input_tokens: int = 10,
+    output_tokens: int = 20,
+    cache_read: int | None = None,
+    cache_creation: int | None = None,
+) -> SimpleNamespace:
+    return SimpleNamespace(
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        cache_read_input_tokens=cache_read,
+        cache_creation_input_tokens=cache_creation,
+    )
+
+
+def make_text_block(text: str) -> anthropic.types.TextBlock:
+    return anthropic.types.TextBlock(type="text", text=text)
+
+
+def make_tool_use_block(
+    id: str = "toolu_01",
+    name: str = "read_file",
+    input: dict | None = None,
+) -> anthropic.types.ToolUseBlock:
+    return anthropic.types.ToolUseBlock(
+        type="tool_use",
+        id=id,
+        name=name,
+        input=input or {"path": "/tmp/file.txt"},
+    )
+
+
+def make_response(
+    stop_reason: str | None,
+    content: list,
+    usage: SimpleNamespace | None = None,
+) -> SimpleNamespace:
+    return SimpleNamespace(
+        stop_reason=stop_reason,
+        content=content,
+        usage=usage or make_usage(),
+    )
+
+
+FAKE_CONTEXT_WINDOW = 200_000
+
+
+def make_agent(
+    tools: ToolRegistry | None = None,
+    mock_response: SimpleNamespace | None = None,
+) -> tuple[AnthropicAgent, AsyncMock]:
+    client = MagicMock(spec=anthropic.AsyncAnthropic)
+    client.messages = MagicMock()
+    client.messages.create = AsyncMock(return_value=mock_response or make_response("end_turn", []))
+    client.models = MagicMock()
+    client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW))
+    registry = tools or ToolRegistry([])
+    agent = AnthropicAgent(
+        client=client,
+        tools=registry,
+        system_prompt="You are helpful.",
+        name="test-agent",
+    )
+    return agent, client.messages.create
+
+
+# ---------------------------------------------------------------------------
+# end_turn with a single TextBlock
+# ---------------------------------------------------------------------------
+
+
+async def test_end_turn_single_text_block() -> None:
+    content = [make_text_block("Hello!")]
+    resp = make_response("end_turn", content)
+    agent, _ = make_agent(mock_response=resp)
+
+    result = await agent.send("Hi")
+
+    assert result.stop_reason is StopReason.END_TURN
+    assert result.text == "Hello!"
+    assert result.tool_calls == []
+    assert len(agent.history) == 2
+    assert agent.history[0] == {"role": "user", "content": "Hi"}
+    assert agent.history[1] == {"role": "assistant", "content": content}
+
+
+# ---------------------------------------------------------------------------
+# tool_use
+# ---------------------------------------------------------------------------
+
+
+async def test_tool_use_single_block() -> None:
+    block = make_tool_use_block(id="toolu_42", name="read_file", input={"path": "/etc/hosts"})
+    resp = make_response("tool_use", [block])
+    agent, _ = make_agent(mock_response=resp)
+
+    result = await agent.send("Read hosts")
+
+    assert result.stop_reason is StopReason.TOOL_USE
+    assert len(result.tool_calls) == 1
+    tc = result.tool_calls[0]
+    assert tc.id == "toolu_42"
+    assert tc.name == "read_file"
+    assert tc.input == {"path": "/etc/hosts"}
+
+
+# ---------------------------------------------------------------------------
+# mixed TextBlock + ToolUseBlock
+# ---------------------------------------------------------------------------
+
+
+async def test_mixed_text_and_tool_use() -> None:
+    content = [
+        make_text_block("I'll read the file for you."),
+        make_tool_use_block(id="toolu_01", name="read_file"),
+    ]
+    resp = make_response("tool_use", content)
+    agent, _ = make_agent(mock_response=resp)
+
+    result = await agent.send("Read a file")
+
+    assert result.text == "I'll read the file for you."
+    assert len(result.tool_calls) == 1
+
+
+# ---------------------------------------------------------------------------
+# Multiple TextBlocks are concatenated
+# ---------------------------------------------------------------------------
+
+
+async def test_multiple_text_blocks_are_concatenated() -> None:
+    content = [make_text_block("Hello, "), make_text_block("world!")]
+    resp = make_response("end_turn", content)
+    agent, _ = make_agent(mock_response=resp)
+
+    result = await agent.send("Hi")
+
+    assert result.text == "Hello, \nworld!"
+
+
+# ---------------------------------------------------------------------------
+# max_tokens is a normal response (not an error)
+# ---------------------------------------------------------------------------
+
+
+async def test_max_tokens_is_not_an_error() -> None:
+    resp = make_response("max_tokens", [make_text_block("Truncated...")])
+    agent, _ = make_agent(mock_response=resp)
+
+    result = await agent.send("Tell me everything")
+
+    assert result.stop_reason is StopReason.MAX_TOKENS
+    assert len(agent.history) == 2
+
+
+# ---------------------------------------------------------------------------
+# allowed_tools filtering
+# ---------------------------------------------------------------------------
+
+
+class FakeTool:
+    def __init__(self, name: str) -> None:
+        self._name = name
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def description(self) -> str:
+        return ""
+
+    @property
+    def definition(self) -> dict:
+        return {"name": self._name, "description": "", "input_schema": {}}
+
+    async def run(self, raw: dict) -> ToolResult:
+        pass
+
+
+async def test_allowed_tools_filters_to_subset() -> None:
+    registry = ToolRegistry([FakeTool(n) for n in ["read_file", "grep", "mkdir"]])
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, create_mock = make_agent(tools=registry, mock_response=resp)
+
+    await agent.send("Hi", allowed_tools=["read_file"])
+
+    sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]]
+    assert sent_names == ["read_file"]
+
+
+async def test_allowed_tools_none_passes_all() -> None:
+    registry = ToolRegistry([FakeTool(n) for n in ["a", "b"]])
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, create_mock = make_agent(tools=registry, mock_response=resp)
+
+    await agent.send("Hi", allowed_tools=None)
+
+    sent_names = [t["name"] for t in create_mock.call_args.kwargs["tools"]]
+    assert sent_names == ["a", "b"]
+
+
+@pytest.mark.parametrize("allowed_tools", [[], ["nonexistent_tool"]])
+async def test_allowed_tools_passes_not_given(allowed_tools: list[str]) -> None:
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, create_mock = make_agent(mock_response=resp)
+
+    await agent.send("Hi", allowed_tools=allowed_tools)
+
+    assert create_mock.call_args.kwargs["tools"] is anthropic.NOT_GIVEN
+
+
+# ---------------------------------------------------------------------------
+# API errors map to the correct AgentError subclass
+# ---------------------------------------------------------------------------
+
+
+def _make_error_agent(side_effect: Exception) -> AnthropicAgent:
+    client = MagicMock(spec=anthropic.AsyncAnthropic)
+    client.messages = MagicMock()
+    client.messages.create = AsyncMock(side_effect=side_effect)
+    return AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
+
+
+async def test_connection_error_maps_to_agent_connection_error() -> None:
+    agent = _make_error_agent(anthropic.APIConnectionError(request=MagicMock()))
+
+    with pytest.raises(AgentConnectionError) as exc_info:
+        await agent.send("Hi")
+
+    assert "Connection failed" in str(exc_info.value)
+    assert agent.history == []
+
+
+async def test_rate_limit_error_maps_to_agent_rate_limit_error() -> None:
+    agent = _make_error_agent(
+        anthropic.RateLimitError(
+            message="rate limit",
+            response=MagicMock(status_code=429, headers={}),
+            body=None,
+        )
+    )
+
+    with pytest.raises(AgentRateLimitError) as exc_info:
+        await agent.send("Hi")
+
+    assert "Rate limit exceeded" in str(exc_info.value)
+    assert agent.history == []
+
+
+async def test_api_status_error_maps_to_agent_api_error() -> None:
+    agent = _make_error_agent(
+        anthropic.APIStatusError(
+            message="internal server error",
+            response=MagicMock(status_code=500),
+            body=None,
+        )
+    )
+
+    with pytest.raises(AgentAPIError) as exc_info:
+        await agent.send("Hi")
+
+    assert exc_info.value.status_code == 500
+    assert agent.history == []
+
+
+async def test_response_validation_error_maps_to_agent_error() -> None:
+    agent = _make_error_agent(anthropic.APIResponseValidationError(response=MagicMock(), body=None))
+
+    with pytest.raises(AgentError) as exc_info:
+        await agent.send("Hi")
+
+    assert "Response validation failed" in str(exc_info.value)
+    assert agent.history == []
+
+
+# ---------------------------------------------------------------------------
+# Unknown stop_reason raises AgentError, history unchanged
+# ---------------------------------------------------------------------------
+
+
+async def test_unknown_stop_reason_raises_agent_error() -> None:
+    resp = make_response("totally_unknown_reason", [])
+    agent, _ = make_agent(mock_response=resp)
+
+    with pytest.raises(AgentError) as exc_info:
+        await agent.send("Hi")
+
+    assert agent.history == []
+    assert "Unknown stop_reason" in str(exc_info.value)
+    assert "totally_unknown_reason" in str(exc_info.value)
+
+
+# ---------------------------------------------------------------------------
+# cache_read_input_tokens=None defaults to 0
+# ---------------------------------------------------------------------------
+
+
+async def test_cache_tokens_none_defaults_to_zero() -> None:
+    usage = make_usage(cache_read=None, cache_creation=None)
+    resp = make_response("end_turn", [make_text_block("ok")], usage=usage)
+    agent, _ = make_agent(mock_response=resp)
+
+    result = await agent.send("Hi")
+
+    assert result.usage.cache_read_input_tokens == 0
+    assert result.usage.cache_creation_input_tokens == 0
+
+
+# ---------------------------------------------------------------------------
+# ContextUsage fields
+# ---------------------------------------------------------------------------
+
+
+async def test_context_usage_fields() -> None:
+    usage = make_usage(input_tokens=1000, cache_read=500, cache_creation=200)
+    resp = make_response("end_turn", [make_text_block("ok")], usage=usage)
+    agent, _ = make_agent(mock_response=resp)
+
+    result = await agent.send("Hi")
+
+    ctx = result.usage.context
+    assert ctx.window_size == FAKE_CONTEXT_WINDOW
+    assert ctx.used_tokens == 1700  # 1000 + 500 + 200
+    assert ctx.context_pct == pytest.approx(1700 / FAKE_CONTEXT_WINDOW * 100)
+    assert ctx.remaining_tokens == FAKE_CONTEXT_WINDOW - 1700
+
+
+# ---------------------------------------------------------------------------
+# context_window is fetched once and cached across multiple sends
+# ---------------------------------------------------------------------------
+
+
+async def test_context_window_fetched_once() -> None:
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, _ = make_agent(mock_response=resp)
+    agent._client.messages.create = AsyncMock(return_value=resp)
+
+    await agent.send("First")
+    await agent.send("Second")
+
+    agent._client.models.retrieve.assert_awaited_once()
+
+
+# ---------------------------------------------------------------------------
+# Multi-turn — send str then send tool results → history has 4 entries
+# ---------------------------------------------------------------------------
+
+
+async def test_multi_turn_history_grows_correctly() -> None:
+    tool_resp = make_response("tool_use", [make_tool_use_block(id="toolu_01")])
+    text_resp = make_response("end_turn", [make_text_block("Done.")])
+
+    client = MagicMock(spec=anthropic.AsyncAnthropic)
+    client.messages = MagicMock()
+    client.messages.create = AsyncMock(side_effect=[tool_resp, text_resp])
+    client.models = MagicMock()
+    client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW))
+    agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
+
+    first = await agent.send("Do X")
+    assert first.stop_reason is StopReason.TOOL_USE
+    assert len(agent.history) == 2
+
+    tool_results = [{"type": "tool_result", "tool_use_id": "toolu_01", "content": "result"}]
+    second = await agent.send(tool_results)
+    assert second.stop_reason is StopReason.END_TURN
+    assert len(agent.history) == 4
+    assert agent.history[2]["role"] == "user"
+    assert agent.history[3]["role"] == "assistant"
+
+
+# ---------------------------------------------------------------------------
+# history property returns a copy
+# ---------------------------------------------------------------------------
+
+
+async def test_history_property_returns_copy() -> None:
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, _ = make_agent(mock_response=resp)
+    await agent.send("Hi")
+
+    snapshot = agent.history
+    snapshot.clear()
+
+    assert len(agent.history) == 2
+
+
+# ---------------------------------------------------------------------------
+# reset() clears history
+# ---------------------------------------------------------------------------
+
+
+async def test_reset_clears_history() -> None:
+    resp = make_response("end_turn", [make_text_block("ok")])
+    agent, _ = make_agent(mock_response=resp)
+    await agent.send("Hi")
+    assert len(agent.history) == 2
+
+    agent.reset()
+    assert agent.history == []
+
+
+# ---------------------------------------------------------------------------
+# Error mid-conversation leaves history unchanged
+# ---------------------------------------------------------------------------
+
+
+async def test_error_mid_conversation_leaves_history_unchanged() -> None:
+    ok_resp = make_response("end_turn", [make_text_block("ok")])
+    client = MagicMock(spec=anthropic.AsyncAnthropic)
+    client.messages = MagicMock()
+    client.messages.create = AsyncMock(
+        side_effect=[
+            ok_resp,
+            anthropic.APIConnectionError(request=MagicMock()),
+        ]
+    )
+    client.models = MagicMock()
+    client.models.retrieve = AsyncMock(return_value=SimpleNamespace(max_input_tokens=FAKE_CONTEXT_WINDOW))
+    agent = AnthropicAgent(client=client, tools=ToolRegistry([]), system_prompt="", name="t")
+
+    await agent.send("First message")
+    history_after_first = agent.history[:]
+
+    with pytest.raises(AgentConnectionError):
+        await agent.send("Second message")
+
+    assert agent.history == history_after_first
diff --git a/ddev/tests/ai/tools/core/test_base.py b/ddev/tests/ai/tools/core/test_base.py
index 96cd0f8b07d0c..35e94f750a69e 100644
--- a/ddev/tests/ai/tools/core/test_base.py
+++ b/ddev/tests/ai/tools/core/test_base.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from typing import Annotated
 
 import pytest
@@ -194,8 +193,8 @@ async def __call__(self, tool_input: SimpleInput) -> ToolResult:
 # --- run(): happy path ---
 
 
-def test_run_valid_input_returns_success(echo_tool: EchoTool):
-    result = asyncio.run(echo_tool.run({"message": "hello"}))
+async def test_run_valid_input_returns_success(echo_tool: EchoTool):
+    result = await echo_tool.run({"message": "hello"})
     assert result.success is True
     assert result.data == "hello"
 
@@ -210,8 +209,8 @@ def test_run_valid_input_returns_success(echo_tool: EchoTool):
         {"message": "hi", "extra": "oops"},
     ],
 )
-def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict):
-    result = asyncio.run(echo_tool.run(raw))
+async def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict):
+    result = await echo_tool.run(raw)
     assert result.success is False
     assert result.error is not None
 
@@ -219,8 +218,8 @@ def test_run_invalid_input_returns_failure(echo_tool: EchoTool, raw: dict):
 # --- run(): __call__ exception handling ---
 
 
-def test_run_captures_exception_from_call(failing_tool: FailingTool):
-    result = asyncio.run(failing_tool.run({"message": "boom"}))
+async def test_run_captures_exception_from_call(failing_tool: FailingTool):
+    result = await failing_tool.run({"message": "boom"})
     assert isinstance(result, ToolResult)
     assert result.success is False
     assert "RuntimeError" in result.error
diff --git a/ddev/tests/ai/tools/core/test_registry.py b/ddev/tests/ai/tools/core/test_registry.py
index fdd42714b6ed4..1366a9d8b5be8 100644
--- a/ddev/tests/ai/tools/core/test_registry.py
+++ b/ddev/tests/ai/tools/core/test_registry.py
@@ -1,11 +1,10 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 
 import pytest
 
-from ddev.ai.tools.core.registry import ALLOWED_TOOL_CALLERS, ToolRegistry
+from ddev.ai.tools.core.registry import ToolRegistry
 from ddev.ai.tools.core.types import ToolResult
 
 # ---------------------------------------------------------------------------
@@ -76,8 +75,6 @@ def test_empty_registry_returns_empty_list():
 def test_tool_registry_definitions_returns_all_tool_definitions():
     registry = ToolRegistry([FakeTool("a"), FakeTool("b")])
     assert len(registry.definitions) == 2
-    for defn in registry.definitions:
-        assert defn["allowed_callers"] == ALLOWED_TOOL_CALLERS
 
 
 def test_definition_contains_tool_name():
@@ -90,41 +87,41 @@ def test_definition_contains_tool_name():
 # ---------------------------------------------------------------------------
 
 
-def test_run_dispatches_to_correct_tool():
+async def test_run_dispatches_to_correct_tool():
     tool_a = FakeTool("a", ToolResult(success=True, data="from a"))
     tool_b = FakeTool("b", ToolResult(success=True, data="from b"))
     registry = ToolRegistry([tool_a, tool_b])
 
-    result = asyncio.run(registry.run("b", {}))
+    result = await registry.run("b", {})
     assert result.success is True
     assert result.data == "from b"
 
 
-def test_passes_raw_dict_to_tool_unchanged():
+async def test_passes_raw_dict_to_tool_unchanged():
     tool = FakeTool("t")
     registry = ToolRegistry([tool])
     raw = {"key": "value", "num": 42}
 
-    asyncio.run(registry.run("t", raw))
+    await registry.run("t", raw)
     assert tool.last_raw == raw
 
 
-def test_returns_tool_result_on_tool_failure():
+async def test_returns_tool_result_on_tool_failure():
     registry = ToolRegistry([FakeTool("t", ToolResult(success=False, error="bad input"))])
-    result = asyncio.run(registry.run("t", {}))
+    result = await registry.run("t", {})
     assert result.success is False
     assert result.error == "bad input"
 
 
-def test_unknown_tool_returns_failure():
+async def test_unknown_tool_returns_failure():
     registry = ToolRegistry([FakeTool("known_tool")])
-    result = asyncio.run(registry.run("unknown_tool", {}))
+    result = await registry.run("unknown_tool", {})
     assert result.success is False
     assert "Unknown tool: 'unknown_tool'" in result.error
 
 
-def test_empty_registry_always_returns_unknown_error():
+async def test_empty_registry_always_returns_unknown_error():
     registry = ToolRegistry([])
-    result = asyncio.run(registry.run("anything", {}))
+    result = await registry.run("anything", {})
     assert result.success is False
     assert result.error is not None
diff --git a/ddev/tests/ai/tools/fs/conftest.py b/ddev/tests/ai/tools/fs/conftest.py
index 8d6677b98c398..12ae9e34eb1d5 100644
--- a/ddev/tests/ai/tools/fs/conftest.py
+++ b/ddev/tests/ai/tools/fs/conftest.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 
 import pytest
 
@@ -38,8 +37,8 @@ def append_tool(registry: FileRegistry) -> AppendFileTool:
 
 
 @pytest.fixture
-def known_file(tmp_path, create_tool: CreateFileTool):
+async def known_file(tmp_path, create_tool: CreateFileTool):
     """A temp file registered in the registry via create."""
     f = tmp_path / "file.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": "line one\nline two\nline three\n"}))
+    await create_tool.run({"path": str(f), "content": "line one\nline two\nline three\n"})
     return f
diff --git a/ddev/tests/ai/tools/fs/test_append_file.py b/ddev/tests/ai/tools/fs/test_append_file.py
index 2b669572d30bb..289142e378191 100644
--- a/ddev/tests/ai/tools/fs/test_append_file.py
+++ b/ddev/tests/ai/tools/fs/test_append_file.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import patch
 
 import pytest
@@ -23,8 +22,10 @@ def test_tool_name(registry: FileRegistry) -> None:
         ("A\r\nB\r\n", "A\nB\n", "\r"),
     ],
 )
-def test_append_file_success(append_tool: AppendFileTool, known_file, content, expected_in, expected_not_in) -> None:
-    result = asyncio.run(append_tool.run({"path": str(known_file), "content": content}))
+async def test_append_file_success(
+    append_tool: AppendFileTool, known_file, content, expected_in, expected_not_in
+) -> None:
+    result = await append_tool.run({"path": str(known_file), "content": content})
 
     assert result.success is True
     text = known_file.read_text(encoding="utf-8")
@@ -33,11 +34,11 @@ def test_append_file_success(append_tool: AppendFileTool, known_file, content, e
         assert expected_not_in not in text
 
 
-def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tmp_path) -> None:
+async def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tmp_path) -> None:
     f = tmp_path / "unread.txt"
     f.write_text("content", encoding="utf-8")
 
-    result = asyncio.run(append_tool.run({"path": str(f), "content": "more"}))
+    result = await append_tool.run({"path": str(f), "content": "more"})
 
     assert result.success is False
     assert "Not authorized" in result.error
@@ -50,39 +51,39 @@ def test_append_file_fails_for_unregistered_file(append_tool: AppendFileTool, tm
         ("", "first line", "first line"),
     ],
 )
-def test_append_file_separator(
+async def test_append_file_separator(
     append_tool: AppendFileTool, create_tool: CreateFileTool, tmp_path, initial, appended, expected
 ) -> None:
     f = tmp_path / "file.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": initial}))
+    await create_tool.run({"path": str(f), "content": initial})
 
-    result = asyncio.run(append_tool.run({"path": str(f), "content": appended}))
+    result = await append_tool.run({"path": str(f), "content": appended})
 
     assert result.success is True
     assert f.read_text(encoding="utf-8") == expected
 
 
-def test_append_file_fails_if_file_changed_externally(append_tool: AppendFileTool, known_file) -> None:
+async def test_append_file_fails_if_file_changed_externally(append_tool: AppendFileTool, known_file) -> None:
     known_file.write_text("externally modified\n", encoding="utf-8")
 
-    result = asyncio.run(append_tool.run({"path": str(known_file), "content": "more"}))
+    result = await append_tool.run({"path": str(known_file), "content": "more"})
 
     assert result.success is False
     assert "Re-read and retry" in result.error
 
 
-def test_append_file_updates_registry(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None:
-    asyncio.run(append_tool.run({"path": str(known_file), "content": "extra\n"}))
+async def test_append_file_updates_registry(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None:
+    await append_tool.run({"path": str(known_file), "content": "extra\n"})
 
     new_content = known_file.read_text(encoding="utf-8")
     assert registry.verify(str(known_file), new_content) is True
 
 
-def test_append_file_oserror_on_write(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None:
+async def test_append_file_oserror_on_write(append_tool: AppendFileTool, registry: FileRegistry, known_file) -> None:
     original_content = known_file.read_text(encoding="utf-8")
 
     with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")):
-        result = asyncio.run(append_tool.run({"path": str(known_file), "content": "new line"}))
+        result = await append_tool.run({"path": str(known_file), "content": "new line"})
 
     assert result.success is False
     assert result.error is not None
diff --git a/ddev/tests/ai/tools/fs/test_create_file.py b/ddev/tests/ai/tools/fs/test_create_file.py
index 2714ef5bb06aa..8b0c0296fa38a 100644
--- a/ddev/tests/ai/tools/fs/test_create_file.py
+++ b/ddev/tests/ai/tools/fs/test_create_file.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import patch
 
 from ddev.ai.tools.fs.create_file import CreateFileTool
@@ -12,41 +11,41 @@ def test_tool_name(registry: FileRegistry) -> None:
     assert CreateFileTool(registry).name == "create_file"
 
 
-def test_create_file_success(create_tool: CreateFileTool, tmp_path) -> None:
+async def test_create_file_success(create_tool: CreateFileTool, tmp_path) -> None:
     f = tmp_path / "new.txt"
 
-    result = asyncio.run(create_tool.run({"path": str(f), "content": "hello"}))
+    result = await create_tool.run({"path": str(f), "content": "hello"})
 
     assert result.success is True
     assert f.read_text(encoding="utf-8") == "hello"
 
 
-def test_create_file_default_empty_content(create_tool: CreateFileTool, tmp_path) -> None:
+async def test_create_file_default_empty_content(create_tool: CreateFileTool, tmp_path) -> None:
     f = tmp_path / "empty.txt"
 
-    result = asyncio.run(create_tool.run({"path": str(f)}))
+    result = await create_tool.run({"path": str(f)})
 
     assert result.success is True
     assert f.read_text(encoding="utf-8") == ""
 
 
-def test_create_file_creates_missing_parent_dirs(create_tool: CreateFileTool, tmp_path) -> None:
+async def test_create_file_creates_missing_parent_dirs(create_tool: CreateFileTool, tmp_path) -> None:
     f = tmp_path / "a" / "b" / "c" / "file.txt"
 
-    result = asyncio.run(create_tool.run({"path": str(f), "content": "nested"}))
+    result = await create_tool.run({"path": str(f), "content": "nested"})
 
     assert result.success is True
     assert f.exists()
     assert f.read_text(encoding="utf-8") == "nested"
 
 
-def test_create_file_fails_if_file_already_exists(
+async def test_create_file_fails_if_file_already_exists(
     create_tool: CreateFileTool, registry: FileRegistry, tmp_path
 ) -> None:
     f = tmp_path / "existing.txt"
     f.write_text("original", encoding="utf-8")
 
-    result = asyncio.run(create_tool.run({"path": str(f), "content": "new"}))
+    result = await create_tool.run({"path": str(f), "content": "new"})
 
     assert result.success is False
     assert result.error is not None
@@ -54,19 +53,19 @@ def test_create_file_fails_if_file_already_exists(
     assert not registry.is_known(str(f))
 
 
-def test_create_tool_registers_in_registry(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
+async def test_create_tool_registers_in_registry(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
     f = tmp_path / "file.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": "hi"}))
+    await create_tool.run({"path": str(f), "content": "hi"})
 
     assert registry.is_known(str(f)) is True
     assert registry.verify(str(f), "hi") is True
 
 
-def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
+async def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
     f = tmp_path / "a" / "b" / "new.txt"
 
     with patch("pathlib.Path.mkdir", side_effect=PermissionError("permission denied")):
-        result = asyncio.run(create_tool.run({"path": str(f), "content": "hi"}))
+        result = await create_tool.run({"path": str(f), "content": "hi"})
 
     assert result.success is False
     assert result.error is not None
@@ -74,11 +73,11 @@ def test_create_file_oserror_on_mkdir(create_tool: CreateFileTool, registry: Fil
     assert not registry.is_known(str(f))
 
 
-def test_create_file_oserror_on_write(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
+async def test_create_file_oserror_on_write(create_tool: CreateFileTool, registry: FileRegistry, tmp_path) -> None:
     f = tmp_path / "new.txt"
 
     with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")):
-        result = asyncio.run(create_tool.run({"path": str(f), "content": "hi"}))
+        result = await create_tool.run({"path": str(f), "content": "hi"})
 
     assert result.success is False
     assert result.error is not None
diff --git a/ddev/tests/ai/tools/fs/test_edit_file.py b/ddev/tests/ai/tools/fs/test_edit_file.py
index cbfd48a78c193..27c8b87cedce2 100644
--- a/ddev/tests/ai/tools/fs/test_edit_file.py
+++ b/ddev/tests/ai/tools/fs/test_edit_file.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import patch
 
 import pytest
@@ -15,8 +14,8 @@ def test_tool_name(registry: FileRegistry) -> None:
     assert EditFileTool(registry).name == "edit_file"
 
 
-def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None:
-    result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line two", "new_string": "line TWO"}))
+async def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None:
+    result = await edit_tool.run({"path": str(known_file), "old_string": "line two", "new_string": "line TWO"})
 
     assert result.success is True
     content = known_file.read_text(encoding="utf-8")
@@ -24,54 +23,56 @@ def test_edit_file_replaces_string(edit_tool: EditFileTool, known_file) -> None:
     assert "line two" not in content
 
 
-def test_edit_file_deletes_line(edit_tool: EditFileTool, known_file) -> None:
-    result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line two\n", "new_string": ""}))
+async def test_edit_file_deletes_line(edit_tool: EditFileTool, known_file) -> None:
+    result = await edit_tool.run({"path": str(known_file), "old_string": "line two\n", "new_string": ""})
 
     assert result.success is True
     assert "line two" not in known_file.read_text(encoding="utf-8")
 
 
-def test_edit_file_fails_for_unregistered_file(edit_tool: EditFileTool, tmp_path) -> None:
+async def test_edit_file_fails_for_unregistered_file(edit_tool: EditFileTool, tmp_path) -> None:
     f = tmp_path / "unread.txt"
     f.write_text("content", encoding="utf-8")
 
-    result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "content", "new_string": "new"}))
+    result = await edit_tool.run({"path": str(f), "old_string": "content", "new_string": "new"})
 
     assert result.success is False
     assert "Not authorized" in result.error
 
 
 @pytest.mark.parametrize("old_string", ["does not exist", ""])
-def test_edit_file_fails_if_old_string_not_found_or_empty(edit_tool: EditFileTool, known_file, old_string) -> None:
-    result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": old_string, "new_string": "x"}))
+async def test_edit_file_fails_if_old_string_not_found_or_empty(
+    edit_tool: EditFileTool, known_file, old_string
+) -> None:
+    result = await edit_tool.run({"path": str(known_file), "old_string": old_string, "new_string": "x"})
 
     assert result.success is False
 
 
-def test_edit_file_fails_if_old_string_ambiguous(
+async def test_edit_file_fails_if_old_string_ambiguous(
     edit_tool: EditFileTool, create_tool: CreateFileTool, tmp_path
 ) -> None:
     f = tmp_path / "dup.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": "foo\nfoo\nfoo\n"}))
+    await create_tool.run({"path": str(f), "content": "foo\nfoo\nfoo\n"})
 
-    result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "foo", "new_string": "bar"}))
+    result = await edit_tool.run({"path": str(f), "old_string": "foo", "new_string": "bar"})
 
     assert result.success is False
     assert "3" in result.error
     assert result.hint is not None
 
 
-def test_edit_file_fails_if_file_changed_externally(edit_tool: EditFileTool, known_file) -> None:
+async def test_edit_file_fails_if_file_changed_externally(edit_tool: EditFileTool, known_file) -> None:
     known_file.write_text("externally modified\n", encoding="utf-8")
 
-    result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"}))
+    result = await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"})
 
     assert result.success is False
     assert "Re-read and retry" in result.error
 
 
-def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None:
-    asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "LINE ONE"}))
+async def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None:
+    await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "LINE ONE"})
 
     new_content = known_file.read_text(encoding="utf-8")
     assert registry.verify(str(known_file), new_content) is True
@@ -85,23 +86,23 @@ def test_edit_file_updates_registry(edit_tool: EditFileTool, registry: FileRegis
         ("line one\n", "line one", "A\r\nB", "A\nB\n"),  # CRLF in new_string
     ],
 )
-def test_edit_file_normalizes_crlf(
+async def test_edit_file_normalizes_crlf(
     edit_tool: EditFileTool, create_tool: CreateFileTool, tmp_path, file_content, old_string, new_string, expected
 ) -> None:
     f = tmp_path / "file.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": file_content}))
+    await create_tool.run({"path": str(f), "content": file_content})
 
-    result = asyncio.run(edit_tool.run({"path": str(f), "old_string": old_string, "new_string": new_string}))
+    result = await edit_tool.run({"path": str(f), "old_string": old_string, "new_string": new_string})
 
     assert result.success is True
     assert f.read_text(encoding="utf-8") == expected
 
 
-def test_edit_file_oserror_on_write(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None:
+async def test_edit_file_oserror_on_write(edit_tool: EditFileTool, registry: FileRegistry, known_file) -> None:
     original_content = known_file.read_text(encoding="utf-8")
 
     with patch("pathlib.Path.write_text", side_effect=PermissionError("permission denied")):
-        result = asyncio.run(edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"}))
+        result = await edit_tool.run({"path": str(known_file), "old_string": "line one", "new_string": "x"})
 
     assert result.success is False
     assert result.error is not None
diff --git a/ddev/tests/ai/tools/fs/test_read_file.py b/ddev/tests/ai/tools/fs/test_read_file.py
index f1b8da06d91ed..f2497e6c09a18 100644
--- a/ddev/tests/ai/tools/fs/test_read_file.py
+++ b/ddev/tests/ai/tools/fs/test_read_file.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import patch
 
 import pytest
@@ -14,47 +13,47 @@ def test_tool_name(registry: FileRegistry) -> None:
     assert ReadFileTool(registry).name == "read_file"
 
 
-def test_read_file_success(read_tool: ReadFileTool, tmp_path) -> None:
+async def test_read_file_success(read_tool: ReadFileTool, tmp_path) -> None:
     f = tmp_path / "config.txt"
     f.write_text("hello\nworld\n", encoding="utf-8")
 
-    result = asyncio.run(read_tool.run({"path": str(f)}))
+    result = await read_tool.run({"path": str(f)})
 
     assert result.success is True
     assert result.data == "0: hello\n1: world\n"
 
 
-def test_read_registers_unknown_file(read_tool: ReadFileTool, registry: FileRegistry, tmp_path) -> None:
+async def test_read_registers_unknown_file(read_tool: ReadFileTool, registry: FileRegistry, tmp_path) -> None:
     f = tmp_path / "file.txt"
     f.write_text("content", encoding="utf-8")
-    asyncio.run(read_tool.run({"path": str(f)}))
+    await read_tool.run({"path": str(f)})
 
     assert registry.is_known(str(f)) is True
 
 
-def test_read_file_missing_file(read_tool: ReadFileTool, tmp_path) -> None:
-    result = asyncio.run(read_tool.run({"path": str(tmp_path / "ghost.txt")}))
+async def test_read_file_missing_file(read_tool: ReadFileTool, tmp_path) -> None:
+    result = await read_tool.run({"path": str(tmp_path / "ghost.txt")})
 
     assert result.success is False
     assert str(tmp_path / "ghost.txt") in result.error
 
 
-def test_read_file_permission_error(read_tool: ReadFileTool, tmp_path) -> None:
+async def test_read_file_permission_error(read_tool: ReadFileTool, tmp_path) -> None:
     f = tmp_path / "secret.txt"
     f.write_text("secret", encoding="utf-8")
 
     with patch("pathlib.Path.read_text", side_effect=PermissionError("permission denied")):
-        result = asyncio.run(read_tool.run({"path": str(f)}))
+        result = await read_tool.run({"path": str(f)})
 
     assert result.success is False
     assert result.error is not None
 
 
-def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None:
+async def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None:
     f = tmp_path / "binary.bin"
     f.write_bytes(b"\xff\xfe\x00binary")
 
-    result = asyncio.run(read_tool.run({"path": str(f)}))
+    result = await read_tool.run({"path": str(f)})
 
     assert result.success is False
     assert result.error is not None
@@ -71,23 +70,23 @@ def test_read_file_binary_file(read_tool: ReadFileTool, tmp_path) -> None:
         (100, None, ""),  # offset beyond EOF
     ],
 )
-def test_read_file_with_offset_and_limit(read_tool: ReadFileTool, tmp_path, offset, limit, expected) -> None:
+async def test_read_file_with_offset_and_limit(read_tool: ReadFileTool, tmp_path, offset, limit, expected) -> None:
     f = tmp_path / "file.txt"
     f.write_text("a\nb\nc\n", encoding="utf-8")
 
-    result = asyncio.run(read_tool.run({"path": str(f), "offset": offset, "limit": limit}))
+    result = await read_tool.run({"path": str(f), "offset": offset, "limit": limit})
 
     assert result.success is True
     assert result.data == expected
 
 
-def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None:
+async def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None:
     from ddev.ai.tools.core.truncation import MAX_CHARS
 
     f = tmp_path / "large.txt"
     f.write_text("x" * (MAX_CHARS + 1000), encoding="utf-8")
 
-    result = asyncio.run(read_tool.run({"path": str(f)}))
+    result = await read_tool.run({"path": str(f)})
 
     assert result.success is True
     assert result.truncated is True
@@ -95,11 +94,11 @@ def test_read_file_truncated(read_tool: ReadFileTool, tmp_path) -> None:
     assert result.hint is not None
 
 
-def test_read_file_no_trailing_newline(read_tool: ReadFileTool, tmp_path) -> None:
+async def test_read_file_no_trailing_newline(read_tool: ReadFileTool, tmp_path) -> None:
     f = tmp_path / "file.txt"
     f.write_text("no newline at end", encoding="utf-8")
 
-    result = asyncio.run(read_tool.run({"path": str(f)}))
+    result = await read_tool.run({"path": str(f)})
 
     assert result.success is True
     assert result.data == "0: no newline at end"
diff --git a/ddev/tests/ai/tools/fs/test_workflow.py b/ddev/tests/ai/tools/fs/test_workflow.py
index 077f63189bf91..a45ad9d937e26 100644
--- a/ddev/tests/ai/tools/fs/test_workflow.py
+++ b/ddev/tests/ai/tools/fs/test_workflow.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 
 from ddev.ai.tools.fs.append_file import AppendFileTool
 from ddev.ai.tools.fs.create_file import CreateFileTool
@@ -10,7 +9,7 @@
 from ddev.ai.tools.fs.read_file import ReadFileTool
 
 
-def test_workflow_create_read_edit_append(
+async def test_workflow_create_read_edit_append(
     create_tool: CreateFileTool,
     read_tool: ReadFileTool,
     edit_tool: EditFileTool,
@@ -21,20 +20,20 @@ def test_workflow_create_read_edit_append(
     f = tmp_path / "workflow.txt"
 
     # Step 1: create
-    r = asyncio.run(create_tool.run({"path": str(f), "content": "version: 1\n"}))
+    r = await create_tool.run({"path": str(f), "content": "version: 1\n"})
     assert r.success is True
 
     # Step 2: read (registers current content)
-    r = asyncio.run(read_tool.run({"path": str(f)}))
+    r = await read_tool.run({"path": str(f)})
     assert r.success is True
 
     # Step 3: edit
-    r = asyncio.run(edit_tool.run({"path": str(f), "old_string": "version: 1", "new_string": "version: 2"}))
+    r = await edit_tool.run({"path": str(f), "old_string": "version: 1", "new_string": "version: 2"})
     assert r.success is True
     assert "version: 2" in f.read_text(encoding="utf-8")
 
     # Step 4: append
-    r = asyncio.run(append_tool.run({"path": str(f), "content": "# updated\n"}))
+    r = await append_tool.run({"path": str(f), "content": "# updated\n"})
     assert r.success is True
     assert f.read_text(encoding="utf-8").endswith("# updated\n")
 
@@ -42,22 +41,22 @@ def test_workflow_create_read_edit_append(
     assert registry.verify(str(f), f.read_text(encoding="utf-8")) is True
 
 
-def test_workflow_stale_file(
+async def test_workflow_stale_file(
     create_tool: CreateFileTool,
     read_tool: ReadFileTool,
     edit_tool: EditFileTool,
     tmp_path,
 ) -> None:
     f = tmp_path / "shared.txt"
-    asyncio.run(create_tool.run({"path": str(f), "content": "original\n"}))
+    await create_tool.run({"path": str(f), "content": "original\n"})
     f.write_text("updated externally\n", encoding="utf-8")
 
-    result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "original", "new_string": "my edit"}))
+    result = await edit_tool.run({"path": str(f), "old_string": "original", "new_string": "my edit"})
     assert result.success is False
     assert "Re-read and retry" in result.error
 
-    asyncio.run(read_tool.run({"path": str(f)}))
+    await read_tool.run({"path": str(f)})
 
-    result = asyncio.run(edit_tool.run({"path": str(f), "old_string": "updated externally", "new_string": "final"}))
+    result = await edit_tool.run({"path": str(f), "old_string": "updated externally", "new_string": "final"})
     assert result.success is True
     assert f.read_text(encoding="utf-8") == "final\n"
diff --git a/ddev/tests/ai/tools/http/test_http_get.py b/ddev/tests/ai/tools/http/test_http_get.py
index d2e8c06220fa1..2cb871bdfd62a 100644
--- a/ddev/tests/ai/tools/http/test_http_get.py
+++ b/ddev/tests/ai/tools/http/test_http_get.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import httpx
@@ -51,8 +50,8 @@ def test_tool_meta(http_tool: HttpGetTool) -> None:
 
 
 @pytest.mark.parametrize("url", ["ftp://example.com", "example.com", "", "//example.com"])
-def test_invalid_url(http_tool: HttpGetTool, url: str) -> None:
-    result = asyncio.run(http_tool.run({"url": url}))
+async def test_invalid_url(http_tool: HttpGetTool, url: str) -> None:
+    result = await http_tool.run({"url": url})
 
     assert result.success is False
     assert "http" in result.error and "https" in result.error
@@ -71,9 +70,9 @@ def test_invalid_url(http_tool: HttpGetTool, url: str) -> None:
         (204, ""),
     ],
 )
-def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) -> None:
+async def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) -> None:
     with patch_httpx(fake_response(status_code, body)):
-        result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"}))
+        result = await http_tool.run({"url": "http://localhost:9090/metrics"})
 
     assert result.success is True
     assert f"Status: {status_code}" in result.data
@@ -81,9 +80,9 @@ def test_request_success(http_tool: HttpGetTool, status_code: int, body: str) ->
 
 
 @pytest.mark.parametrize("status_code", [400, 404, 500, 503])
-def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) -> None:
+async def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) -> None:
     with patch_httpx(fake_response(status_code, "error body")):
-        result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"}))
+        result = await http_tool.run({"url": "http://localhost:9090/metrics"})
 
     assert result.success is True
     assert f"Status: {status_code}" in result.data
@@ -94,17 +93,17 @@ def test_request_non_success_status(http_tool: HttpGetTool, status_code: int) ->
 # ---------------------------------------------------------------------------
 
 
-def test_request_timeout(http_tool: HttpGetTool) -> None:
+async def test_request_timeout(http_tool: HttpGetTool) -> None:
     with patch_httpx(side_effect=httpx.TimeoutException("timed out")):
-        result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics", "timeout": 1.0}))
+        result = await http_tool.run({"url": "http://localhost:9090/metrics", "timeout": 1.0})
 
     assert result.success is False
     assert "timed out after 1.0s" in result.error
 
 
-def test_request_error(http_tool: HttpGetTool) -> None:
+async def test_request_error(http_tool: HttpGetTool) -> None:
     with patch_httpx(side_effect=httpx.RequestError("connection refused")):
-        result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"}))
+        result = await http_tool.run({"url": "http://localhost:9090/metrics"})
 
     assert result.success is False
     assert "Request failed" in result.error
@@ -116,12 +115,12 @@ def test_request_error(http_tool: HttpGetTool) -> None:
 
 
 @pytest.mark.parametrize("status_code", [200, 500])
-def test_response_truncated(http_tool: HttpGetTool, status_code: int) -> None:
+async def test_response_truncated(http_tool: HttpGetTool, status_code: int) -> None:
     from ddev.ai.tools.core.truncation import MAX_CHARS
 
     large_body = "x" * (MAX_CHARS + 1000)
     with patch_httpx(fake_response(status_code, large_body)):
-        result = asyncio.run(http_tool.run({"url": "http://localhost:9090/metrics"}))
+        result = await http_tool.run({"url": "http://localhost:9090/metrics"})
 
     assert result.success is True
     assert result.truncated is True
diff --git a/ddev/tests/ai/tools/shell/test_base.py b/ddev/tests/ai/tools/shell/test_base.py
index 5d7431239a5e7..3568170b9092d 100644
--- a/ddev/tests/ai/tools/shell/test_base.py
+++ b/ddev/tests/ai/tools/shell/test_base.py
@@ -79,42 +79,42 @@ def slow_greet_tool() -> SlowGreetTool:
 # ---------------------------------------------------------------------------
 
 
-def test_run_command_success(proc):
+async def test_run_command_success(proc):
     with patch_proc(proc):
-        result = asyncio.run(run_command(["echo", "hello"]))
+        result = await run_command(["echo", "hello"])
     assert result.success is True
     assert result.data == "hello\n"
     assert result.truncated is False
 
 
-def test_run_command_failure_combines_stdout_and_stderr():
+async def test_run_command_failure_combines_stdout_and_stderr():
     proc = make_proc(returncode=1, stdout=b"partial\n", stderr=b"error\n")
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.success is False
     assert "partial" in result.data
     assert "error" in result.data
 
 
-def test_run_command_failure_stderr_only_when_no_stdout():
+async def test_run_command_failure_stderr_only_when_no_stdout():
     proc = make_proc(returncode=1, stdout=b"", stderr=b"fatal error\n")
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.success is False and result.data == "fatal error\n"
 
 
-def test_run_command_ignores_stderr_on_zero_exit():
+async def test_run_command_ignores_stderr_on_zero_exit():
     proc = make_proc(returncode=0, stdout=b"out\n", stderr=b"warning\n")
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.success is True
     assert "warning" not in result.data
 
 
-def test_run_command_stderr_included_when_stdout_empty_on_success():
+async def test_run_command_stderr_included_when_stdout_empty_on_success():
     proc = make_proc(returncode=0, stdout=b"", stderr=b"info: initialized\n")
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.success is True
     assert result.data == "info: initialized\n"
 
@@ -127,10 +127,10 @@ def test_run_command_stderr_included_when_stdout_empty_on_success():
         (1, b"", b""),
     ],
 )
-def test_run_command_empty_output(returncode, stdout, stderr):
+async def test_run_command_empty_output(returncode, stdout, stderr):
     proc = make_proc(returncode=returncode, stdout=stdout, stderr=stderr)
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.data == "(no output)"
 
 
@@ -139,27 +139,27 @@ def test_run_command_empty_output(returncode, stdout, stderr):
 # ---------------------------------------------------------------------------
 
 
-def test_run_command_not_found():
+async def test_run_command_not_found():
     with patch("asyncio.create_subprocess_exec", side_effect=FileNotFoundError()):
-        result = asyncio.run(run_command(["nonexistent"]))
+        result = await run_command(["nonexistent"])
     assert result.success is False
     assert "Command not found" in result.error
     assert "nonexistent" in result.error
 
 
-def test_run_command_timeout():
+async def test_run_command_timeout():
     proc = make_proc()
     with patch_proc(proc):
         with patch("asyncio.wait_for", new=_raise_timeout):
-            result = asyncio.run(run_command(["sleep", "100"], timeout=5))
+            result = await run_command(["sleep", "100"], timeout=5)
     assert result.success is False
     assert "5s" in result.error
     proc.kill.assert_called_once()
 
 
-def test_run_command_unexpected_exception():
+async def test_run_command_unexpected_exception():
     with patch("asyncio.create_subprocess_exec", side_effect=OSError("permission denied")):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.success is False
     assert "OSError" in result.error
     assert "permission denied" in result.error
@@ -170,21 +170,21 @@ def test_run_command_unexpected_exception():
 # ---------------------------------------------------------------------------
 
 
-def test_run_command_truncation():
+async def test_run_command_truncation():
     large = ("x" * 80 + "\n") * 700
     proc = make_proc(stdout=large.encode())
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.truncated is True
     assert result.total_size == len(large)
     assert result.shown_size == len(result.data)
     assert result.hint is not None
 
 
-def test_run_command_no_truncation_at_limit():
+async def test_run_command_no_truncation_at_limit():
     proc = make_proc(stdout=("x" * MAX_CHARS).encode())
     with patch_proc(proc):
-        result = asyncio.run(run_command(["cmd"]))
+        result = await run_command(["cmd"])
     assert result.truncated is False
     assert result.total_size is None
     assert result.hint is None
@@ -200,10 +200,10 @@ def test_cmd_tool_timeouts(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool
     assert SlowGreetTool.timeout == 60  # custom timeout
 
 
-def test_cmd_tool_dispatches_with_correct_timeout(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool):
+async def test_cmd_tool_dispatches_with_correct_timeout(greet_tool: GreetTool, slow_greet_tool: SlowGreetTool):
     for tool, expected_timeout in [(greet_tool, 10), (slow_greet_tool, 60)]:
         with patch(
             "ddev.ai.tools.shell.base.run_command", new=AsyncMock(return_value=ToolResult(success=True))
         ) as mock_run:
-            asyncio.run(tool.run({"name": "world"}))
+            await tool.run({"name": "world"})
         mock_run.assert_called_once_with(["echo", "hello world"], timeout=expected_timeout)
diff --git a/ddev/tests/ai/tools/shell/test_tools.py b/ddev/tests/ai/tools/shell/test_tools.py
index 81fcb45d3d3b1..05084acc97e9e 100644
--- a/ddev/tests/ai/tools/shell/test_tools.py
+++ b/ddev/tests/ai/tools/shell/test_tools.py
@@ -1,7 +1,6 @@
 # (C) Datadog, Inc. 2026-present
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
-import asyncio
 from unittest.mock import AsyncMock, patch
 
 import pytest
@@ -66,12 +65,12 @@ def test_grep_cmd_pattern_and_path_placement(grep_tool: GrepTool):
     assert cmd[-1] == "/my dir/sub dir"
 
 
-def test_grep_no_matches_returns_success(grep_tool: GrepTool):
+async def test_grep_no_matches_returns_success(grep_tool: GrepTool):
     from ddev.ai.tools.core.types import ToolResult
 
     no_match_result = ToolResult(success=False, data="(no output)", error=None)
     with patch("ddev.ai.tools.shell.grep.run_command", new=AsyncMock(return_value=no_match_result)):
-        result = asyncio.run(grep_tool(GrepInput(pattern="nomatch", path="/tmp")))
+        result = await grep_tool(GrepInput(pattern="nomatch", path="/tmp"))
     assert result.success is True
     assert result.data == "(no output)"