diff --git a/CHANGELOG.md b/CHANGELOG.md index ed046b5..7e99102 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.0.3] + +### Added + +- Hisotry api to read received requetss + ## [0.0.2] ### Added diff --git a/README.md b/README.md index ecd4541..22e0480 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ OpenAI-compatible mock server for testing LLM integrations. ## Features - OpenAI API compatibility with key endpoints (`/models`, `/chat/completions`, `/responses`) +- **Request history** — inspect all received requests via `GET /history` and clear them with `DELETE /history` (no auth required) - Default mirror strategy (echoes input as output) - **Tool calling support** — trigger phrase–driven tool call responses when `tools` are present in the request using `call tool '' with ''` - **Error simulation** — trigger phrase–driven error responses using `raise error ` in the last user message @@ -62,6 +63,24 @@ uv run uvicorn llmock.app:app --host 0.0.0.0 --port 8000 --reload The server will be available at `http://localhost:8000`. Health check available at `/health`. +## Request History + +Two utility endpoints (no authentication required) let tests inspect what the server received: + +| Method | Path | Description | +|--------|------|-------------| +| `GET` | `/history` | Returns all recorded requests in order (`{ "requests": [...] }`) | +| `DELETE` | `/history` | Clears the history (returns `204 No Content`) | + +Each entry in `requests` contains `method`, `path`, `body`, and `timestamp`. + +```bash +# See what requests were received +curl http://localhost:8000/history + +# Reset between test runs +curl -X DELETE http://localhost:8000/history +``` ## Configuration diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index b39a9cc..22ba74f 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -6,6 +6,8 @@ Mock server implementing OpenAI's `/models`, `/chat/completions`, and `/responses` endpoints. Default behavior: echo input as output (MirrorStrategy). Pluggable strategy system for custom behaviors. +Includes **request history endpoints** (`GET /history`, `DELETE /history`) that allow tests to inspect and reset the list of received requests without authentication. + **Spec Reference**: Follow [OpenAI API Reference](https://platform.openai.com/docs/api-reference) exactly. **OpenAPI Spec**: https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml diff --git a/src/llmock/app.py b/src/llmock/app.py index 199b320..ac00d6f 100644 --- a/src/llmock/app.py +++ b/src/llmock/app.py @@ -8,8 +8,9 @@ from fastapi.responses import JSONResponse from starlette.middleware.base import BaseHTTPMiddleware +from llmock import history_store from llmock.config import Config, get_config -from llmock.routers import chat, health, models, responses +from llmock.routers import chat, health, history, models, responses class APIKeyMiddleware(BaseHTTPMiddleware): @@ -26,8 +27,8 @@ async def dispatch(self, request: Request, call_next): if request.method == "OPTIONS": return await call_next(request) - # Skip auth for health endpoint - if request.url.path == "/health": + # Skip auth for health and history endpoints + if request.url.path in ("/health", "/history"): return await call_next(request) config_api_key = self.config.get("api-key") @@ -54,6 +55,27 @@ async def dispatch(self, request: Request, call_next): return await call_next(request) +# Paths that should not be recorded in the history +_HISTORY_SKIP_PATHS = {"/health", "/history"} + + +class HistoryRecordingMiddleware(BaseHTTPMiddleware): + """Middleware to record all incoming API requests into the history store.""" + + async def dispatch(self, request: Request, call_next): + """Record the request body then forward the request.""" + if request.method != "OPTIONS" and request.url.path not in _HISTORY_SKIP_PATHS: + body = await request.body() # caches body for downstream handlers + parsed_body = None + if body: + try: + parsed_body = json.loads(body) + except json.JSONDecodeError, ValueError: + parsed_body = body.decode(errors="replace") + history_store.add_entry(request.method, request.url.path, parsed_body) + return await call_next(request) + + class DebugLoggingMiddleware(BaseHTTPMiddleware): """Middleware to pretty-print incoming request bodies when debug mode is enabled.""" @@ -97,12 +119,16 @@ def create_app(config: Config = get_config()) -> FastAPI: # Add API key middleware app.add_middleware(APIKeyMiddleware, config=config) + # Add history recording middleware (records before auth so all requests captured) + app.add_middleware(HistoryRecordingMiddleware) + # Add debug logging middleware (outermost, runs before auth) if config.get("debug"): app.add_middleware(DebugLoggingMiddleware, config=config) # Include routers app.include_router(health.router) + app.include_router(history.router) app.include_router(models.router) app.include_router(chat.router) app.include_router(responses.router) diff --git a/src/llmock/history_store.py b/src/llmock/history_store.py new file mode 100644 index 0000000..71e777b --- /dev/null +++ b/src/llmock/history_store.py @@ -0,0 +1,28 @@ +"""In-memory store for recording incoming API requests.""" + +from datetime import UTC, datetime +from typing import Any + +_history: list[dict[str, Any]] = [] + + +def add_entry(method: str, path: str, body: Any) -> None: + """Append a request entry to the history.""" + _history.append( + { + "method": method, + "path": path, + "body": body, + "timestamp": datetime.now(UTC).isoformat(), + } + ) + + +def get_all() -> list[dict[str, Any]]: + """Return all recorded request entries in order.""" + return list(_history) + + +def reset() -> None: + """Clear all recorded request entries.""" + _history.clear() diff --git a/src/llmock/routers/history.py b/src/llmock/routers/history.py new file mode 100644 index 0000000..2871706 --- /dev/null +++ b/src/llmock/routers/history.py @@ -0,0 +1,28 @@ +"""History endpoints — no authentication required.""" + +from typing import Any + +from fastapi import APIRouter +from pydantic import BaseModel + +from llmock import history_store + +router = APIRouter(prefix="", tags=["history"]) + + +class HistoryResponse(BaseModel): + """Response model for the history endpoint.""" + + requests: list[dict[str, Any]] + + +@router.get("/history", response_model=HistoryResponse) +async def get_history() -> HistoryResponse: + """Return all received requests in the order they were received.""" + return HistoryResponse(requests=history_store.get_all()) + + +@router.delete("/history", status_code=204) +async def reset_history() -> None: + """Clear the recorded request history.""" + history_store.reset() diff --git a/src/llmock/strategies/strategy_content_mirror.py b/src/llmock/strategies/strategy_content_mirror.py index 85d0cb6..8c46fe8 100644 --- a/src/llmock/strategies/strategy_content_mirror.py +++ b/src/llmock/strategies/strategy_content_mirror.py @@ -26,43 +26,29 @@ class ChatMirrorStrategy: def __init__(self, config: dict[str, Any]) -> None: pass - # Roles that can produce a mirror response, in priority order. - # The reversed message list is scanned and the first message whose role - # appears here determines the response. - _MIRROR_ROLES = ("tool", "user") - def generate_response( self, request: ChatCompletionRequest ) -> list[StrategyResponse]: - """Return a response based on the most recent message with a mirror role. - - Scans the message list in reverse and returns a response for the first - message whose role is in ``_MIRROR_ROLES``: - - - ``"tool"`` → ``"last tool call result is "`` - - ``"user"`` → echoes the message content + """Return a response echoing the last user message. Args: request: The chat completion request containing messages. Returns: A single-item list with a text StrategyResponse, or a default - message when no qualifying message is found. + message when no user message is found. """ last = next( ( msg for msg in reversed(request.messages) - if msg.role in self._MIRROR_ROLES and extract_text_content(msg.content) + if msg.role == "user" and extract_text_content(msg.content) ), None, ) if last is None: return [text_response("No user message provided.")] content = extract_text_content(last.content) or "" - if last.role == "tool": - return [text_response(f"last tool call result is {content}")] - # role == "user" return ( [text_response(content)] if content diff --git a/src/llmock/strategies/strategy_tool_call.py b/src/llmock/strategies/strategy_tool_call.py index 9f510cf..516464b 100644 --- a/src/llmock/strategies/strategy_tool_call.py +++ b/src/llmock/strategies/strategy_tool_call.py @@ -25,11 +25,12 @@ from typing import Any from llmock.schemas.chat import ChatCompletionRequest -from llmock.schemas.responses import ResponseCreateRequest -from llmock.strategies.base import StrategyResponse, tool_response +from llmock.schemas.responses import FunctionCallOutputItem, ResponseCreateRequest +from llmock.strategies.base import StrategyResponse, text_response, tool_response from llmock.utils.chat import ( extract_last_user_text_chat, extract_last_user_text_response, + extract_text_content, ) # Matches: call tool '' with '' @@ -94,6 +95,20 @@ def generate_response( (msg.role for msg in reversed(request.messages) if msg.role != "system"), None, ) + if last_role == "tool": + content = next( + ( + extract_text_content(msg.content) + for msg in reversed(request.messages) + if msg.role == "tool" + ), + None, + ) + return ( + [text_response(f"last tool call result is {content}")] + if content + else [] + ) if last_role != "user": return [] text = extract_last_user_text_chat(request) @@ -133,9 +148,10 @@ def generate_response( last_item = request.input[-1] if request.input else None if last_item is None: return [] + if isinstance(last_item, FunctionCallOutputItem): + return [text_response(f"last tool call result is {last_item.output}")] last_role = getattr(last_item, "role", None) if last_role != "user": - # Covers FunctionCallOutputItem (no role) and assistant items. return [] text = extract_last_user_text_response(request) if text is None: diff --git a/tests/test_chat_tool_calls.py b/tests/test_chat_tool_calls.py index 5cc0a70..af7b896 100644 --- a/tests/test_chat_tool_calls.py +++ b/tests/test_chat_tool_calls.py @@ -240,11 +240,11 @@ async def test_tool_call_non_streaming(raw_client: httpx.AsyncClient) -> None: async def test_tool_call_does_not_fire_when_last_message_is_tool_result( raw_client: httpx.AsyncClient, ) -> None: - """In an agentic loop the ToolCallStrategy must NOT re-trigger on cycle 2+. + """In an agentic loop ToolCallStrategy returns the tool result as a text response. History: user(trigger) → assistant(tool_call) → tool(result) - The last non-system message is 'tool', so the strategy should return [] - and NOT produce another tool call response. + The last non-system message is 'tool', so the strategy should return a text + response with the tool result content instead of another tool call. """ response = await raw_client.post( "/chat/completions", @@ -282,11 +282,12 @@ async def test_tool_call_does_not_fire_when_last_message_is_tool_result( assert response.status_code == 200 data = response.json() - # ToolCallStrategy must NOT fire — the trigger was already processed. - # With only ToolCallStrategy in the chain, it falls through and produces []. - assert data["choices"] == [], ( - "ToolCallStrategy should not re-fire when the last message is a tool result" - ) + # ToolCallStrategy returns the tool result as a text response, not another tool call. + assert len(data["choices"]) == 1 + choice = data["choices"][0] + assert choice["finish_reason"] == "stop" + assert choice["message"]["content"] == "last tool call result is 4" + assert choice["message"].get("tool_calls") is None async def test_tool_call_does_not_fire_when_last_message_is_assistant( @@ -551,8 +552,8 @@ async def test_full_agentic_loop_mirrors_user_message_after_tool_result() -> Non With the default composition [ErrorStrategy, ToolCallStrategy, MirrorStrategy]: - ErrorStrategy: no "raise error" phrase → returns [] - - ToolCallStrategy: last non-system message is "tool", not "user" → returns [] - - MirrorStrategy: echoes the last *user* message as a plain text response + - ToolCallStrategy: last non-system message is "tool" → returns "last tool call result is 4" + - MirrorStrategy: not reached Expected: a single assistant text choice whose content equals the original user message. No tool_calls in the second-turn response. @@ -610,8 +611,7 @@ async def test_full_agentic_loop_mirrors_user_message_after_tool_result() -> Non assert response.status_code == 200 data = response.json() - # ToolCallStrategy does not re-trigger: last non-system message is "tool" - # MirrorStrategy kicks in and returns the tool result with a prefix + # ToolCallStrategy handles the tool result and returns a text response assert len(data["choices"]) == 1 choice = data["choices"][0] assert choice["finish_reason"] == "stop" diff --git a/tests/test_history.py b/tests/test_history.py new file mode 100644 index 0000000..7532967 --- /dev/null +++ b/tests/test_history.py @@ -0,0 +1,136 @@ +"""Tests for the /history endpoints.""" + +import pytest +from httpx import AsyncClient + +from llmock import history_store + + +@pytest.fixture(autouse=True) +def reset_history() -> None: + """Ensure history is empty before and after every test.""" + history_store.reset() + yield + history_store.reset() + + +async def test_history_initially_empty(client: AsyncClient) -> None: + """GET /history returns an empty list when no requests have been made.""" + response = await client.get("/history") + assert response.status_code == 200 + assert response.json() == {"requests": []} + + +async def test_history_records_chat_request(client: AsyncClient) -> None: + """A chat completion request appears in the history.""" + await client.post( + "/chat/completions", + json={"model": "gpt-4o", "messages": [{"role": "user", "content": "Hello"}]}, + ) + + response = await client.get("/history") + assert response.status_code == 200 + data = response.json() + assert len(data["requests"]) == 1 + entry = data["requests"][0] + assert entry["method"] == "POST" + assert entry["path"] == "/chat/completions" + assert entry["body"]["model"] == "gpt-4o" + + +async def test_history_preserves_order(client: AsyncClient) -> None: + """Multiple requests are stored in the order they were received.""" + await client.post( + "/chat/completions", + json={"model": "gpt-4o", "messages": [{"role": "user", "content": "first"}]}, + ) + await client.post( + "/chat/completions", + json={"model": "gpt-4o", "messages": [{"role": "user", "content": "second"}]}, + ) + + response = await client.get("/history") + data = response.json() + assert len(data["requests"]) == 2 + assert data["requests"][0]["body"]["messages"][0]["content"] == "first" + assert data["requests"][1]["body"]["messages"][0]["content"] == "second" + + +async def test_history_does_not_record_history_get(client: AsyncClient) -> None: + """GET /history itself is not recorded in the history.""" + await client.get("/history") + await client.get("/history") + + response = await client.get("/history") + assert response.json() == {"requests": []} + + +async def test_history_does_not_record_health(client: AsyncClient) -> None: + """Health check calls are not recorded in the history.""" + await client.get("/health") + + response = await client.get("/history") + assert response.json() == {"requests": []} + + +async def test_history_reset_clears_entries(client: AsyncClient) -> None: + """DELETE /history removes all entries and returns 204.""" + await client.post( + "/chat/completions", + json={"model": "gpt-4o", "messages": [{"role": "user", "content": "Hi"}]}, + ) + + reset_response = await client.delete("/history") + assert reset_response.status_code == 204 + + history_response = await client.get("/history") + assert history_response.json() == {"requests": []} + + +async def test_history_reset_does_not_require_auth(client: AsyncClient) -> None: + """DELETE /history works without an Authorization header.""" + # Use a raw client without the auth header + from httpx import ASGITransport, AsyncClient as RawClient + + from llmock.app import create_app + from llmock.config import get_config + + config = {"models": [], "api-key": "secret"} + app = create_app(config=config) + app.dependency_overrides[get_config] = lambda: config + + transport = ASGITransport(app=app) + async with RawClient(transport=transport, base_url="http://test") as raw: + response = await raw.delete("/history") + assert response.status_code == 204 + + +async def test_history_get_does_not_require_auth(client: AsyncClient) -> None: + """GET /history works without an Authorization header.""" + from httpx import ASGITransport, AsyncClient as RawClient + + from llmock.app import create_app + from llmock.config import get_config + + config = {"models": [], "api-key": "secret"} + app = create_app(config=config) + app.dependency_overrides[get_config] = lambda: config + + transport = ASGITransport(app=app) + async with RawClient(transport=transport, base_url="http://test") as raw: + response = await raw.get("/history") + assert response.status_code == 200 + + +async def test_history_entry_has_timestamp(client: AsyncClient) -> None: + """Each history entry includes a timestamp.""" + await client.post( + "/chat/completions", + json={"model": "gpt-4o", "messages": [{"role": "user", "content": "ts test"}]}, + ) + + response = await client.get("/history") + entry = response.json()["requests"][0] + assert "timestamp" in entry + # Basic ISO 8601 check + assert "T" in entry["timestamp"] diff --git a/tests/test_responses_tool_calls.py b/tests/test_responses_tool_calls.py index 3389610..bf20db7 100644 --- a/tests/test_responses_tool_calls.py +++ b/tests/test_responses_tool_calls.py @@ -321,11 +321,11 @@ async def test_responses_tool_call_picks_configured_tool( async def test_responses_tool_call_does_not_fire_when_last_item_is_function_call_output( client: httpx.AsyncClient, ) -> None: - """In an agentic loop the ToolCallStrategy must NOT re-trigger on cycle 2+. + """In an agentic loop the ToolCallStrategy handles the tool result on cycle 2+. History: user(trigger) → function_call(tool call) → function_call_output(result) - The last item is a FunctionCallOutputItem, so the strategy should return [] - and NOT produce another tool call response. + The last item is a FunctionCallOutputItem, so the strategy should return + a "last tool call result is ..." text response instead of another tool call. """ response = await client.post( "/responses", @@ -348,9 +348,8 @@ async def test_responses_tool_call_does_not_fire_when_last_item_is_function_call assert response.status_code == 200 data = response.json() - # ToolCallStrategy must NOT fire — the trigger was already processed. - # The composition chain falls through → router produces a text message, not a tool call. - assert len(data["output"]) > 0 - assert data["output"][0]["type"] != "function_call", ( - "ToolCallStrategy should not re-fire when the last item is a function_call_output" - ) + assert len(data["output"]) == 1 + output_item = data["output"][0] + assert output_item["type"] == "message" + assert output_item["content"][0]["type"] == "output_text" + assert output_item["content"][0]["text"] == "last tool call result is 4" diff --git a/tests/test_strategies.py b/tests/test_strategies.py index 29b84be..2863a23 100644 --- a/tests/test_strategies.py +++ b/tests/test_strategies.py @@ -330,12 +330,12 @@ def test_chat_tool_call_strategy_empty_args_normalised() -> None: def test_chat_tool_call_strategy_last_message_is_tool_role() -> None: - """Strategy must NOT fire when the last non-system message has role 'tool'. + """Strategy returns the tool result as text when last non-system message is 'tool'. In an agentic loop the conversation history looks like: user(trigger) → assistant(tool_call) → tool(result) The trigger phrase still lives in the user message, but the last turn - belongs to the tool. Firing again would create an infinite loop. + belongs to the tool. The strategy should return the tool result as text. """ strategy = ChatToolCallStrategy(config={}) request = ChatCompletionRequest( @@ -353,8 +353,9 @@ def test_chat_tool_call_strategy_last_message_is_tool_role() -> None: result = strategy.generate_response(request) - # The last non-system message is 'tool' → strategy must fall through. - assert result == [] + # The last non-system message is 'tool' → strategy returns 'last tool call result is 4'. + assert len(result) == 1 + assert result[0].content == "last tool call result is 4" # ============================================================================