Skip to content

Commit 5873402

Browse files
authored
Conformance burn-down: server-side InputRequiredResult, Mcp-Method/Name validation, x-mcp-header filter (14 scenarios → green) (#2974)
1 parent 603342f commit 5873402

18 files changed

Lines changed: 1163 additions & 150 deletions

File tree

.github/actions/conformance/client.py

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
json-schema-ref-no-deref - Connect, list tools (no $ref deref)
2121
request-metadata - Connect with all callbacks; client stamps _meta
2222
http-standard-headers - Connect, call a tool (Mcp-* headers checked)
23+
http-invalid-tool-headers - List tools, call every surfaced tool (x-mcp-header filter)
2324
elicitation-sep1034-client-defaults - Elicitation with default accept callback
25+
sep-2322-client-request-state - Drive the manual MRTR retry surface
2426
auth/client-credentials-jwt - Client credentials with private_key_jwt
2527
auth/client-credentials-basic - Client credentials with client_secret_basic
2628
auth/* - Authorization code flow (default for auth scenarios)
@@ -296,6 +298,43 @@ async def run_http_standard_headers(server_url: str) -> None:
296298
logger.debug(f"add_numbers result: {result}")
297299

298300

301+
def _stub_required_args(input_schema: dict[str, Any]) -> dict[str, Any]:
302+
"""Minimal arguments satisfying a tool inputSchema's required list."""
303+
by_type: dict[str, Any] = {
304+
"string": "x",
305+
"integer": 0,
306+
"number": 0,
307+
"boolean": False,
308+
"object": {},
309+
"array": [],
310+
"null": None,
311+
}
312+
properties = input_schema.get("properties", {})
313+
return {name: by_type.get(properties.get(name, {}).get("type"), "x") for name in input_schema.get("required", [])}
314+
315+
316+
@register("http-invalid-tool-headers")
317+
async def run_http_invalid_tool_headers(server_url: str) -> None:
318+
"""List tools, then call every tool the SDK surfaces (SEP-2243).
319+
320+
The harness mock advertises one valid tool plus several with malformed
321+
x-mcp-header annotations (empty, non-primitive type, duplicate, invalid
322+
chars). The scenario passes if valid_tool is called and the malformed
323+
ones are not -- so a conforming client filters them out of the list_tools
324+
result and the loop below never sees them. The scenario sets
325+
allowClientError, so a per-call failure is logged and skipped rather
326+
than aborting the whole run.
327+
"""
328+
async with Client(server_url, mode=client_mode()) as client:
329+
listed = await client.list_tools()
330+
logger.debug(f"Surfaced tools: {[t.name for t in listed.tools]}")
331+
for tool in listed.tools:
332+
try:
333+
await client.call_tool(tool.name, _stub_required_args(tool.input_schema))
334+
except Exception:
335+
logger.exception(f"call_tool({tool.name!r}) failed")
336+
337+
299338
@register("elicitation-sep1034-client-defaults")
300339
async def run_elicitation_defaults(server_url: str) -> None:
301340
"""Connect with elicitation callback that applies schema defaults."""
@@ -305,6 +344,53 @@ async def run_elicitation_defaults(server_url: str) -> None:
305344
logger.debug(f"test_client_elicitation_defaults result: {result}")
306345

307346

347+
@register("sep-2322-client-request-state")
348+
async def run_mrtr_client(server_url: str) -> None:
349+
"""Drive the manual MRTR retry surface against the SEP-2322 client mock.
350+
351+
The mock speaks the modern lifecycle (server/discover, no initialize) and
352+
inspects the wire params of each tools/call round, so this exercises the
353+
explicit allow_input_required=True path rather than an auto-loop: round 1
354+
receives an InputRequiredResult, the fixture fulfils the elicitation
355+
locally, then round 2 retries with input_responses + the echoed
356+
request_state. Passing request_state straight off the typed result -- a
357+
str when the server sent one, None when it didn't -- lets the
358+
serializer's exclude_none drop the key in the no-state case without a
359+
branch here. The unrelated call between rounds proves MRTR params don't
360+
leak across tools, and the no-result-type call must parse as a complete
361+
CallToolResult with no retry.
362+
"""
363+
async with Client(server_url, mode=client_mode()) as client:
364+
await client.list_tools()
365+
confirm = {"confirm": types.ElicitResult(action="accept", content={"confirmed": True})}
366+
367+
r1 = await client.call_tool("test_mrtr_echo_state", {}, allow_input_required=True)
368+
assert isinstance(r1, types.InputRequiredResult)
369+
370+
await client.call_tool("test_mrtr_unrelated", {})
371+
372+
await client.call_tool(
373+
"test_mrtr_echo_state",
374+
{},
375+
input_responses=confirm,
376+
request_state=r1.request_state,
377+
allow_input_required=True,
378+
)
379+
380+
r2 = await client.call_tool("test_mrtr_no_state", {}, allow_input_required=True)
381+
assert isinstance(r2, types.InputRequiredResult)
382+
await client.call_tool(
383+
"test_mrtr_no_state",
384+
{},
385+
input_responses=confirm,
386+
request_state=r2.request_state,
387+
allow_input_required=True,
388+
)
389+
390+
result = await client.call_tool("test_mrtr_no_result_type", {})
391+
assert isinstance(result, types.CallToolResult)
392+
393+
308394
@register("auth/client-credentials-jwt")
309395
async def run_client_credentials_jwt(server_url: str) -> None:
310396
"""Client credentials flow with private_key_jwt authentication."""
@@ -441,8 +527,7 @@ def main() -> None:
441527
asyncio.run(run_auth_code_client(server_url))
442528
else:
443529
# Unhandled scenarios:
444-
# - sep-2322-client-request-state (SEP-2322 / S6: MRTR client loop)
445-
# - http-custom-headers, http-invalid-tool-headers (SEP-2243 / S8: Mcp-Param-* headers)
530+
# - http-custom-headers (SEP-2243 / S8: Mcp-Param-* emission)
446531
print(f"Unknown scenario: {scenario}", file=sys.stderr)
447532
sys.exit(1)
448533
else:

.github/actions/conformance/expected-failures.2026-07-28.yml

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -21,48 +21,19 @@
2121
# milestone.
2222

2323
client:
24-
# --- Same gaps as the 2025 baseline (fail identically when forced to 2026-07-28) ---
25-
# SEP-2322 (multi-round-trip requests): client does not echo requestState /
26-
# handle IncompleteResult yet.
27-
- sep-2322-client-request-state
28-
# SEP-2243 (HTTP standardization): no fixture handler / client Mcp-Param-* support yet.
24+
# SEP-2243 (HTTP standardization): no client Mcp-Param-* support yet — needs the
25+
# tool-schema-cache vs per-call tool_definition design (S8).
2926
- http-custom-headers
30-
- http-invalid-tool-headers
3127
# auth/enterprise-managed-authorization (SEP-990) is in the 2025 baseline but
3228
# NOT here: the harness skips it as inapplicable at --spec-version 2026-07-28
3329
# (it is an extension scenario not carried into the 2026 wire), so it is
3430
# neither run nor evaluated on this leg.
3531

3632
server:
37-
# --- Carried-forward 2025-era scenarios still failing on the 2026 wire ---
3833
# The stateless 2026 path now reaches handlers for plain request/response
3934
# scenarios; tools-call-with-progress still fails because the stateless
4035
# server has no channel for server→client progress notifications.
4136
- tools-call-with-progress
42-
# SEP-2106 (JSON Schema 2020-12 in tool inputSchema): the fixture tool's
43-
# schema has none of the 2020-12 keywords the scenario checks. The scenario
44-
# is in `--suite all` but not `--suite active`, so this is the only leg that
45-
# runs it; it fails identically at 2025-11-25 (not a 2026-path regression).
46-
- json-schema-2020-12
47-
48-
# --- Draft scenarios (same failures and reasons as the `--suite draft` leg) ---
49-
# SEP-2322 (multi-round-trip requests / IncompleteResult): not implemented.
50-
- input-required-result-basic-elicitation
51-
- input-required-result-basic-sampling
52-
- input-required-result-basic-list-roots
53-
- input-required-result-request-state
54-
- input-required-result-multiple-input-requests
55-
- input-required-result-multi-round
37+
# SEP-2322 (multi-round-trip requests / IncompleteResult): the prompt pipeline
38+
# cannot return InputRequiredResult from MCPServer yet (tools/call can).
5639
- input-required-result-non-tool-request
57-
- input-required-result-result-type
58-
- input-required-result-tampered-state
59-
- input-required-result-capability-check
60-
# SEP-2243 (HTTP header standardization): Mcp-Method / Mcp-Name cross-check
61-
# against the request body is not implemented.
62-
- http-header-validation
63-
# WARNING-only entries: these scenarios emit no FAILURE checks but the
64-
# expected-failures evaluator counts WARNINGs as failures (the summary line
65-
# only shows passed/failed, not warnings, so a local re-probe can mis-read
66-
# these as stale).
67-
- input-required-result-missing-input-response
68-
- input-required-result-validate-input

.github/actions/conformance/expected-failures.yml

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,9 @@
1212

1313
client:
1414
# --- Draft-spec scenarios (in `--suite draft`, also part of `--suite all`) ---
15-
# SEP-2322 (multi-round-trip requests): client does not echo requestState /
16-
# handle IncompleteResult yet.
17-
- sep-2322-client-request-state
18-
# SEP-2243 (HTTP standardization): no fixture handler / client Mcp-Param-* support yet.
15+
# SEP-2243 (HTTP standardization): no client Mcp-Param-* support yet — needs the
16+
# tool-schema-cache vs per-call tool_definition design (S8).
1917
- http-custom-headers
20-
- http-invalid-tool-headers
2118

2219
# --- Pre-existing scenarios that fail on checks added after conformance 0.1.15 ---
2320
# SEP-990 (enterprise-managed authorization extension): no fixture handler /
@@ -26,23 +23,6 @@ client:
2623

2724
server:
2825
# --- Draft-spec scenarios (in `--suite draft`; the `active` suite is green) ---
29-
# SEP-2322 (multi-round-trip requests / IncompleteResult): not implemented.
30-
- input-required-result-basic-elicitation
31-
- input-required-result-basic-sampling
32-
- input-required-result-basic-list-roots
33-
- input-required-result-request-state
34-
- input-required-result-multiple-input-requests
35-
- input-required-result-multi-round
26+
# SEP-2322 (multi-round-trip requests / IncompleteResult): the prompt pipeline
27+
# cannot return InputRequiredResult from MCPServer yet (tools/call can).
3628
- input-required-result-non-tool-request
37-
- input-required-result-result-type
38-
- input-required-result-tampered-state
39-
- input-required-result-capability-check
40-
# SEP-2243 (HTTP header standardization): Mcp-Method / Mcp-Name cross-check
41-
# against the request body is not implemented.
42-
- http-header-validation
43-
# WARNING-only entries: these scenarios emit no FAILURE checks but the
44-
# expected-failures evaluator counts WARNINGs as failures (the summary line
45-
# only shows passed/failed, not warnings, so a local re-probe can mis-read
46-
# these as stale).
47-
- input-required-result-missing-input-response
48-
- input-required-result-validate-input

.github/workflows/conformance.yml

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,10 @@ permissions:
1515

1616
env:
1717
# Pinned conformance harness package spec (passed verbatim to `npx --yes`).
18-
# Use a published version, e.g. @modelcontextprotocol/conformance@0.2.0-alpha.5.
18+
# Use a published version, e.g. @modelcontextprotocol/conformance@0.2.0-alpha.7.
1919
# Bump deliberately and reconcile both
2020
# .github/actions/conformance/expected-failures*.yml files in the same change.
21-
#
22-
# TODO: replace with @modelcontextprotocol/conformance@0.2.0-alpha.5 once
23-
# https://github.com/modelcontextprotocol/conformance/pull/357 publishes, and
24-
# drop CONFORMANCE_PKG_SHA256 plus the fetch-and-verify step below.
25-
CONFORMANCE_PKG: "https://pkg.pr.new/@modelcontextprotocol/conformance@65fcd39"
26-
CONFORMANCE_PKG_SHA256: "9a381d7083f8be2fe7ae44efeca54530f18c61425805ddaf9cd88915efcc1574"
21+
CONFORMANCE_PKG: "@modelcontextprotocol/conformance@0.2.0-alpha.7"
2722

2823
jobs:
2924
server-conformance:
@@ -39,19 +34,6 @@ jobs:
3934
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
4035
with:
4136
node-version: 24
42-
- name: Fetch and verify conformance harness
43-
# Only when CONFORMANCE_PKG is a URL: download, check the recorded
44-
# sha256, and re-point CONFORMANCE_PKG at the verified local tarball.
45-
# When CONFORMANCE_PKG is a registry spec, this step is a no-op (npm's
46-
# own integrity check applies).
47-
run: |
48-
case "$CONFORMANCE_PKG" in
49-
https://*)
50-
curl -fsSL "$CONFORMANCE_PKG" -o /tmp/conformance.tgz
51-
echo "$CONFORMANCE_PKG_SHA256 /tmp/conformance.tgz" | sha256sum -c -
52-
echo "CONFORMANCE_PKG=file:/tmp/conformance.tgz" >> "$GITHUB_ENV"
53-
;;
54-
esac
5537
- run: uv sync --frozen --all-extras --package mcp-everything-server
5638
- name: Run server conformance (active suite)
5739
run: >-
@@ -83,26 +65,22 @@ jobs:
8365
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
8466
with:
8567
node-version: 24
86-
- name: Fetch and verify conformance harness
87-
run: |
88-
case "$CONFORMANCE_PKG" in
89-
https://*)
90-
curl -fsSL "$CONFORMANCE_PKG" -o /tmp/conformance.tgz
91-
echo "$CONFORMANCE_PKG_SHA256 /tmp/conformance.tgz" | sha256sum -c -
92-
echo "CONFORMANCE_PKG=file:/tmp/conformance.tgz" >> "$GITHUB_ENV"
93-
;;
94-
esac
9568
- run: uv sync --frozen --all-extras --package mcp
9669
- name: Run client conformance (all suite)
70+
# The harness runs all scenarios via unbounded Promise.all; with 40
71+
# scenarios on a 2-core runner the slowest one (sse-retry, which has a
72+
# real-time SSE reconnect wait) needs more than the 30s default budget.
9773
run: >-
9874
npx --yes "$CONFORMANCE_PKG" client
9975
--command 'uv run --frozen python .github/actions/conformance/client.py'
10076
--suite all
77+
--timeout 60000
10178
--expected-failures ./.github/actions/conformance/expected-failures.yml
10279
- name: Run client conformance (2026-07-28 wire, all suite)
10380
run: >-
10481
npx --yes "$CONFORMANCE_PKG" client
10582
--command 'uv run --frozen python .github/actions/conformance/client.py'
10683
--suite all
84+
--timeout 60000
10785
--spec-version 2026-07-28
10886
--expected-failures ./.github/actions/conformance/expected-failures.2026-07-28.yml

docs/migration.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ Version 2 of the MCP Python SDK introduces several breaking changes to improve t
1010

1111
### `MCPServer.call_tool()` returns `CallToolResult`
1212

13-
`MCPServer.call_tool()` now always returns a `CallToolResult`. It previously
13+
`MCPServer.call_tool()` now returns a `CallToolResult` (or an
14+
`InputRequiredResult` when a multi-round tool requests further input). It previously
1415
advertised `Sequence[ContentBlock] | dict[str, Any]` and leaked the internal
1516
conversion shapes (a bare content sequence or a `(content, structured_content)`
1617
tuple), forcing callers to re-assemble a `CallToolResult` themselves.

0 commit comments

Comments
 (0)