modAI-systems · guenhter · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.0.3]
+
+### Added
+
+- Hisotry api to read received requetss
+
 ## [0.0.2]
 
 ### Added

diff --git a/README.md b/README.md
@@ -9,6 +9,7 @@ OpenAI-compatible mock server for testing LLM integrations.
 ## Features
 
 - OpenAI API compatibility with key endpoints (`/models`, `/chat/completions`, `/responses`)
+- **Request history** — inspect all received requests via `GET /history` and clear them with `DELETE /history` (no auth required)
 - Default mirror strategy (echoes input as output)
 - **Tool calling support** — trigger phrase–driven tool call responses when `tools` are present in the request using `call tool '<name>' with '<json>'`
 - **Error simulation** — trigger phrase–driven error responses using `raise error <json>` in the last user message
@@ -62,6 +63,24 @@ uv run uvicorn llmock.app:app --host 0.0.0.0 --port 8000 --reload
 
 The server will be available at `http://localhost:8000`. Health check available at `/health`.
 
+## Request History
+
+Two utility endpoints (no authentication required) let tests inspect what the server received:
+
+| Method | Path | Description |
+|--------|------|-------------|
+| `GET` | `/history` | Returns all recorded requests in order (`{ "requests": [...] }`) |
+| `DELETE` | `/history` | Clears the history (returns `204 No Content`) |
+
+Each entry in `requests` contains `method`, `path`, `body`, and `timestamp`.
+
+```bash
+# See what requests were received
+curl http://localhost:8000/history
+
+# Reset between test runs
+curl -X DELETE http://localhost:8000/history
+```
 
 ## Configuration
 

diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
@@ -6,6 +6,8 @@
 
 Mock server implementing OpenAI's `/models`, `/chat/completions`, and `/responses` endpoints. Default behavior: echo input as output (MirrorStrategy). Pluggable strategy system for custom behaviors.
 
+Includes **request history endpoints** (`GET /history`, `DELETE /history`) that allow tests to inspect and reset the list of received requests without authentication.
+
 **Spec Reference**: Follow [OpenAI API Reference](https://platform.openai.com/docs/api-reference) exactly.
 **OpenAPI Spec**: https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml
 

diff --git a/src/llmock/app.py b/src/llmock/app.py
@@ -8,8 +8,9 @@
 from fastapi.responses import JSONResponse
 from starlette.middleware.base import BaseHTTPMiddleware
 
+from llmock import history_store
 from llmock.config import Config, get_config
-from llmock.routers import chat, health, models, responses
+from llmock.routers import chat, health, history, models, responses
 
 
 class APIKeyMiddleware(BaseHTTPMiddleware):
@@ -26,8 +27,8 @@ async def dispatch(self, request: Request, call_next):
         if request.method == "OPTIONS":
             return await call_next(request)
 
-        # Skip auth for health endpoint
-        if request.url.path == "/health":
+        # Skip auth for health and history endpoints
+        if request.url.path in ("/health", "/history"):
             return await call_next(request)
 
         config_api_key = self.config.get("api-key")
@@ -54,6 +55,27 @@ async def dispatch(self, request: Request, call_next):
         return await call_next(request)
 
 
+# Paths that should not be recorded in the history
+_HISTORY_SKIP_PATHS = {"/health", "/history"}
+
+
+class HistoryRecordingMiddleware(BaseHTTPMiddleware):
+    """Middleware to record all incoming API requests into the history store."""
+
+    async def dispatch(self, request: Request, call_next):
+        """Record the request body then forward the request."""
+        if request.method != "OPTIONS" and request.url.path not in _HISTORY_SKIP_PATHS:
+            body = await request.body()  # caches body for downstream handlers
+            parsed_body = None
+            if body:
+                try:
+                    parsed_body = json.loads(body)
+                except json.JSONDecodeError, ValueError:
+                    parsed_body = body.decode(errors="replace")
+            history_store.add_entry(request.method, request.url.path, parsed_body)
+        return await call_next(request)
+
+
 class DebugLoggingMiddleware(BaseHTTPMiddleware):
     """Middleware to pretty-print incoming request bodies when debug mode is enabled."""
 
@@ -97,12 +119,16 @@ def create_app(config: Config = get_config()) -> FastAPI:
     # Add API key middleware
     app.add_middleware(APIKeyMiddleware, config=config)
 
+    # Add history recording middleware (records before auth so all requests captured)
+    app.add_middleware(HistoryRecordingMiddleware)
+
     # Add debug logging middleware (outermost, runs before auth)
     if config.get("debug"):
         app.add_middleware(DebugLoggingMiddleware, config=config)
 
     # Include routers
     app.include_router(health.router)
+    app.include_router(history.router)
     app.include_router(models.router)
     app.include_router(chat.router)
     app.include_router(responses.router)

diff --git a/src/llmock/history_store.py b/src/llmock/history_store.py
@@ -0,0 +1,28 @@
+"""In-memory store for recording incoming API requests."""
+
+from datetime import UTC, datetime
+from typing import Any
+
+_history: list[dict[str, Any]] = []
+
+
+def add_entry(method: str, path: str, body: Any) -> None:
+    """Append a request entry to the history."""
+    _history.append(
+        {
+            "method": method,
+            "path": path,
+            "body": body,
+            "timestamp": datetime.now(UTC).isoformat(),
+        }
+    )
+
+
+def get_all() -> list[dict[str, Any]]:
+    """Return all recorded request entries in order."""
+    return list(_history)
+
+
+def reset() -> None:
+    """Clear all recorded request entries."""
+    _history.clear()
diff --git a/src/llmock/routers/history.py b/src/llmock/routers/history.py
@@ -0,0 +1,28 @@
+"""History endpoints — no authentication required."""
+
+from typing import Any
+
+from fastapi import APIRouter
+from pydantic import BaseModel
+
+from llmock import history_store
+
+router = APIRouter(prefix="", tags=["history"])
+
+
+class HistoryResponse(BaseModel):
+    """Response model for the history endpoint."""
+
+    requests: list[dict[str, Any]]
+
+
+@router.get("/history", response_model=HistoryResponse)
+async def get_history() -> HistoryResponse:
+    """Return all received requests in the order they were received."""
+    return HistoryResponse(requests=history_store.get_all())
+
+
+@router.delete("/history", status_code=204)
+async def reset_history() -> None:
+    """Clear the recorded request history."""
+    history_store.reset()
diff --git a/src/llmock/strategies/strategy_content_mirror.py b/src/llmock/strategies/strategy_content_mirror.py
@@ -26,43 +26,29 @@ class ChatMirrorStrategy:
     def __init__(self, config: dict[str, Any]) -> None:
         pass
 
-    # Roles that can produce a mirror response, in priority order.
-    # The reversed message list is scanned and the first message whose role
-    # appears here determines the response.
-    _MIRROR_ROLES = ("tool", "user")
-
     def generate_response(
         self, request: ChatCompletionRequest
     ) -> list[StrategyResponse]:
-        """Return a response based on the most recent message with a mirror role.
-
-        Scans the message list in reverse and returns a response for the first
-        message whose role is in ``_MIRROR_ROLES``:
-
-        - ``"tool"``  → ``"last tool call result is <content>"``
-        - ``"user"``  → echoes the message content
+        """Return a response echoing the last user message.
 
         Args:
             request: The chat completion request containing messages.
 
         Returns:
             A single-item list with a text StrategyResponse, or a default
-            message when no qualifying message is found.
+            message when no user message is found.
         """
         last = next(
             (
                 msg
                 for msg in reversed(request.messages)
-                if msg.role in self._MIRROR_ROLES and extract_text_content(msg.content)
+                if msg.role == "user" and extract_text_content(msg.content)
             ),
             None,
         )
         if last is None:
             return [text_response("No user message provided.")]
         content = extract_text_content(last.content) or ""
-        if last.role == "tool":
-            return [text_response(f"last tool call result is {content}")]
-        # role == "user"
         return (
             [text_response(content)]
             if content

diff --git a/src/llmock/strategies/strategy_tool_call.py b/src/llmock/strategies/strategy_tool_call.py
@@ -25,11 +25,12 @@
 from typing import Any
 
 from llmock.schemas.chat import ChatCompletionRequest
-from llmock.schemas.responses import ResponseCreateRequest
-from llmock.strategies.base import StrategyResponse, tool_response
+from llmock.schemas.responses import FunctionCallOutputItem, ResponseCreateRequest
+from llmock.strategies.base import StrategyResponse, text_response, tool_response
 from llmock.utils.chat import (
     extract_last_user_text_chat,
     extract_last_user_text_response,
+    extract_text_content,
 )
 
 # Matches:  call tool '<name>' with '<args>'
@@ -94,6 +95,20 @@ def generate_response(
             (msg.role for msg in reversed(request.messages) if msg.role != "system"),
             None,
         )
+        if last_role == "tool":
+            content = next(
+                (
+                    extract_text_content(msg.content)
+                    for msg in reversed(request.messages)
+                    if msg.role == "tool"
+                ),
+                None,
+            )
+            return (
+                [text_response(f"last tool call result is {content}")]
+                if content
+                else []
+            )
         if last_role != "user":
             return []
         text = extract_last_user_text_chat(request)
@@ -133,9 +148,10 @@ def generate_response(
             last_item = request.input[-1] if request.input else None
             if last_item is None:
                 return []
+            if isinstance(last_item, FunctionCallOutputItem):
+                return [text_response(f"last tool call result is {last_item.output}")]
             last_role = getattr(last_item, "role", None)
             if last_role != "user":
-                # Covers FunctionCallOutputItem (no role) and assistant items.
                 return []
         text = extract_last_user_text_response(request)
         if text is None:

diff --git a/tests/test_chat_tool_calls.py b/tests/test_chat_tool_calls.py
@@ -240,11 +240,11 @@ async def test_tool_call_non_streaming(raw_client: httpx.AsyncClient) -> None:
 async def test_tool_call_does_not_fire_when_last_message_is_tool_result(
     raw_client: httpx.AsyncClient,
 ) -> None:
-    """In an agentic loop the ToolCallStrategy must NOT re-trigger on cycle 2+.
+    """In an agentic loop ToolCallStrategy returns the tool result as a text response.
 
     History: user(trigger) → assistant(tool_call) → tool(result)
-    The last non-system message is 'tool', so the strategy should return []
-    and NOT produce another tool call response.
+    The last non-system message is 'tool', so the strategy should return a text
+    response with the tool result content instead of another tool call.
     """
     response = await raw_client.post(
         "/chat/completions",
@@ -282,11 +282,12 @@ async def test_tool_call_does_not_fire_when_last_message_is_tool_result(
 
     assert response.status_code == 200
     data = response.json()
-    # ToolCallStrategy must NOT fire — the trigger was already processed.
-    # With only ToolCallStrategy in the chain, it falls through and produces [].
-    assert data["choices"] == [], (
-        "ToolCallStrategy should not re-fire when the last message is a tool result"
-    )
+    # ToolCallStrategy returns the tool result as a text response, not another tool call.
+    assert len(data["choices"]) == 1
+    choice = data["choices"][0]
+    assert choice["finish_reason"] == "stop"
+    assert choice["message"]["content"] == "last tool call result is 4"
+    assert choice["message"].get("tool_calls") is None
 
 
 async def test_tool_call_does_not_fire_when_last_message_is_assistant(
@@ -551,8 +552,8 @@ async def test_full_agentic_loop_mirrors_user_message_after_tool_result() -> Non
 
     With the default composition [ErrorStrategy, ToolCallStrategy, MirrorStrategy]:
     - ErrorStrategy:    no "raise error" phrase → returns []
-    - ToolCallStrategy: last non-system message is "tool", not "user" → returns []
-    - MirrorStrategy:   echoes the last *user* message as a plain text response
+    - ToolCallStrategy: last non-system message is "tool" → returns "last tool call result is 4"
+    - MirrorStrategy:   not reached
 
     Expected: a single assistant text choice whose content equals the original
     user message.  No tool_calls in the second-turn response.
@@ -610,8 +611,7 @@ async def test_full_agentic_loop_mirrors_user_message_after_tool_result() -> Non
     assert response.status_code == 200
     data = response.json()
 
-    # ToolCallStrategy does not re-trigger: last non-system message is "tool"
-    # MirrorStrategy kicks in and returns the tool result with a prefix
+    # ToolCallStrategy handles the tool result and returns a text response
     assert len(data["choices"]) == 1
     choice = data["choices"][0]
     assert choice["finish_reason"] == "stop"