Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [UNRELEASED]

### Added

- Debug flag for detailled request output

### Changed

- When the last message is a tool call result, the result is returned as instead of the last user message

## [0.0.1]

### Added
Expand Down
3 changes: 3 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
# Port for the HTTP server (default: 8000)
port: 8000

# Debug mode - when true, pretty-prints all incoming request bodies to stdout
debug: false

# API key for authentication (optional - if not set, no auth required)
api-key:

Expand Down
3 changes: 3 additions & 0 deletions docs/llmock-skill/references/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
# Port for the HTTP server (default: 8000)
port: 8000

# Debug mode - when true, pretty-prints all incoming request bodies to stdout
debug: false

# API key for authentication (optional - if not set, no auth required)
api-key:

Expand Down
26 changes: 26 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# ModAI Backend

default:
@just --list

# Install dependencies
install:
uv sync

# Start the development server
start:
uv run uvicorn modai.main:app

# Run tests
test:
uv run pytest

# Check code style and linting
check:
uv run ruff format --check src
uv run ruff check src

# Fix code style and linting issues
check-write:
uv run ruff format src
uv run ruff check --fix src
30 changes: 30 additions & 0 deletions src/llmock/app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""FastAPI application factory and setup."""

import json
import pprint

from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
Expand Down Expand Up @@ -51,6 +54,29 @@ async def dispatch(self, request: Request, call_next):
return await call_next(request)


class DebugLoggingMiddleware(BaseHTTPMiddleware):
"""Middleware to pretty-print incoming request bodies when debug mode is enabled."""

def __init__(self, app, config: Config):
"""Initialize middleware with config."""
super().__init__(app)
self.config = config

async def dispatch(self, request: Request, call_next):
"""Log request body if debug mode is enabled."""
if self.config.get("debug"):
body = await request.body()
if body:
try:
parsed = json.loads(body)
print(f"\n[DEBUG] {request.method} {request.url.path}")
pprint.pprint(parsed)
except json.JSONDecodeError, ValueError:
print(f"\n[DEBUG] {request.method} {request.url.path} (raw)")
print(body.decode(errors="replace"))
return await call_next(request)


def create_app(config: Config = get_config()) -> FastAPI:
"""Create and configure the FastAPI application."""
app = FastAPI(title="llmock")
Expand All @@ -71,6 +97,10 @@ def create_app(config: Config = get_config()) -> FastAPI:
# Add API key middleware
app.add_middleware(APIKeyMiddleware, config=config)

# Add debug logging middleware (outermost, runs before auth)
if config.get("debug"):
app.add_middleware(DebugLoggingMiddleware, config=config)

# Include routers
app.include_router(health.router)
app.include_router(models.router)
Expand Down
42 changes: 33 additions & 9 deletions src/llmock/strategies/strategy_content_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,48 @@ class ChatMirrorStrategy:
def __init__(self, config: dict[str, Any]) -> None:
pass

# Roles that can produce a mirror response, in priority order.
# The reversed message list is scanned and the first message whose role
# appears here determines the response.
_MIRROR_ROLES = ("tool", "user")

def generate_response(
self, request: ChatCompletionRequest
) -> list[StrategyResponse]:
"""Return the content of the last user message.
"""Return a response based on the most recent message with a mirror role.

Scans the message list in reverse and returns a response for the first
message whose role is in ``_MIRROR_ROLES``:

- ``"tool"`` → ``"last tool call result is <content>"``
- ``"user"`` → echoes the message content

Args:
request: The chat completion request containing messages.

Returns:
A single-item list with a text StrategyResponse containing
the last user message, or a default message if none found.
A single-item list with a text StrategyResponse, or a default
message when no qualifying message is found.
"""
for message in reversed(request.messages):
if message.role == "user" and message.content:
text = extract_text_content(message.content)
if text:
return [text_response(text)]
return [text_response("No user message provided.")]
last = next(
(
msg
for msg in reversed(request.messages)
if msg.role in self._MIRROR_ROLES and extract_text_content(msg.content)
),
None,
)
if last is None:
return [text_response("No user message provided.")]
content = extract_text_content(last.content) or ""
if last.role == "tool":
return [text_response(f"last tool call result is {content}")]
# role == "user"
return (
[text_response(content)]
if content
else [text_response("No user message provided.")]
)


class ResponseMirrorStrategy:
Expand Down
91 changes: 91 additions & 0 deletions tests/test_chat_tool_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,3 +528,94 @@ async def test_streaming_without_include_usage(raw_client: httpx.AsyncClient) ->
# No usage chunk should be present
usage_chunks = [c for c in chunks if not c["choices"] and c.get("usage")]
assert len(usage_chunks) == 0


# ============================================================================
# Full agentic loop: user trigger → assistant tool call → tool result
# ============================================================================


async def test_full_agentic_loop_mirrors_user_message_after_tool_result() -> None:
"""Full OpenAI function-calling loop: assert llmock's response on the second turn.

Simulates the conversation history a real client sends after executing a tool:

1. user — original request containing a trigger phrase
("call tool 'calculate' with '...'")
2. assistant — tool call that llmock returned on the first turn
(role=assistant, content=None, tool_calls=[...])
3. tool — the result produced by the caller's tool executor
(role=tool, tool_call_id=..., content="4")

All three messages are replayed to llmock in a single second-turn request.

With the default composition [ErrorStrategy, ToolCallStrategy, MirrorStrategy]:
- ErrorStrategy: no "raise error" phrase → returns []
- ToolCallStrategy: last non-system message is "tool", not "user" → returns []
- MirrorStrategy: echoes the last *user* message as a plain text response

Expected: a single assistant text choice whose content equals the original
user message. No tool_calls in the second-turn response.
"""
full_composition_config: Config = {
"models": [{"id": "gpt-4", "created": 1700000000, "owned_by": "openai"}],
"api-key": TEST_API_KEY,
"strategies": ["ErrorStrategy", "ToolCallStrategy", "MirrorStrategy"],
}
app = create_app(config=full_composition_config)
app.dependency_overrides[get_config] = lambda: full_composition_config

transport = httpx.ASGITransport(app=app)
async with httpx.AsyncClient(
transport=transport,
base_url="http://testserver",
headers={"Authorization": f"Bearer {TEST_API_KEY}"},
) as client:
user_message = "call tool 'calculate' with '{\"expression\": \"2+2\"}'"

response = await client.post(
"/chat/completions",
json={
"model": "gpt-4",
"messages": [
# Turn 1 – user sent the original request with a trigger phrase
{"role": "user", "content": user_message},
# Turn 1 – llmock replied with a tool call (assistant message)
{
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": "call_abc123",
"type": "function",
"function": {
"name": "calculate",
"arguments": '{"expression": "2+2"}',
},
}
],
},
# Turn 2 – tool executor returned the result
{
"role": "tool",
"content": "4",
"tool_call_id": "call_abc123",
},
],
"tools": [CALCULATOR_TOOL],
"stream": False,
},
)

assert response.status_code == 200
data = response.json()

# ToolCallStrategy does not re-trigger: last non-system message is "tool"
# MirrorStrategy kicks in and returns the tool result with a prefix
assert len(data["choices"]) == 1
choice = data["choices"][0]
assert choice["finish_reason"] == "stop"
assert choice["message"]["role"] == "assistant"
assert choice["message"]["content"] == "last tool call result is 4"
# No tool calls in the second-turn response
assert choice["message"].get("tool_calls") is None