|
14 | 14 | import base64 |
15 | 15 | import binascii |
16 | 16 | import re |
17 | | -from collections.abc import Mapping, Sequence |
| 17 | +from collections.abc import Iterator, Mapping, Sequence |
18 | 18 | from dataclasses import dataclass |
19 | 19 | from types import MappingProxyType |
20 | 20 | from typing import Any, Final, cast |
|
81 | 81 | _HEADER_SAFE = re.compile(r"^[\x20-\x7E]*$") |
82 | 82 | # RFC 9110 §5.6.2 token: the only characters permitted in an HTTP field name. |
83 | 83 | _RFC9110_TOKEN = re.compile(r"^[!#$%&'*+\-.^_`|~0-9A-Za-z]+$") |
84 | | -# JSON-Schema types that stringify cleanly into a single header value. The spec |
85 | | -# names string/integer/boolean; number is admitted because the conformance |
86 | | -# harness emits it and float→str round-trips to within tolerance. |
87 | | -_X_MCP_HEADER_PRIMITIVE_TYPES: Final = frozenset({"string", "integer", "boolean", "number"}) |
| 84 | +# JSON-Schema types the spec permits to carry `x-mcp-header` (transports.mdx |
| 85 | +# §Custom Headers). `number` is explicitly forbidden — float→str is not |
| 86 | +# portable across implementations. |
| 87 | +_X_MCP_HEADER_PRIMITIVE_TYPES: Final = frozenset({"string", "integer", "boolean"}) |
| 88 | + |
| 89 | +# JSON Schema 2020-12 applicator keywords whose values are themselves schema |
| 90 | +# positions, grouped by value shape. `properties` is handled separately as the |
| 91 | +# only keyword that preserves the statically-reachable chain; every keyword |
| 92 | +# here drops the chain to None. Instance-data keywords (`default`, `examples`, |
| 93 | +# `const`, `enum`) and `$ref`/`$dynamicRef` are deliberately absent so the |
| 94 | +# walk never mistakes data for an annotation and never dereferences. |
| 95 | +_SUBSCHEMA_SINGLE: Final = frozenset( |
| 96 | + { |
| 97 | + "items", |
| 98 | + "contains", |
| 99 | + "unevaluatedItems", |
| 100 | + "additionalProperties", |
| 101 | + "propertyNames", |
| 102 | + "unevaluatedProperties", |
| 103 | + "not", |
| 104 | + "if", |
| 105 | + "then", |
| 106 | + "else", |
| 107 | + "contentSchema", |
| 108 | + } |
| 109 | +) |
| 110 | +_SUBSCHEMA_LIST: Final = frozenset({"allOf", "anyOf", "oneOf", "prefixItems"}) |
| 111 | +_SUBSCHEMA_MAP: Final = frozenset({"patternProperties", "dependentSchemas", "$defs", "definitions"}) |
| 112 | + |
| 113 | + |
| 114 | +def _walk_schema_positions(root: Any) -> Iterator[tuple[tuple[str, ...] | None, dict[str, Any]]]: |
| 115 | + """Yield `(properties_path, schema)` for every schema position in `root`. |
| 116 | +
|
| 117 | + `properties_path` is the chain of `properties` keys from the root to the |
| 118 | + position, or `None` once any other applicator keyword has been crossed. |
| 119 | + The root itself yields `()`. Only the JSON Schema 2020-12 applicators |
| 120 | + listed above are entered; instance-data keywords are not, and `$ref` is |
| 121 | + not dereferenced, so the walk terminates on any finite JSON value. An |
| 122 | + explicit stack keeps the function total even on pathologically deep input. |
| 123 | + """ |
| 124 | + stack: list[tuple[tuple[str, ...] | None, Any]] = [((), root)] |
| 125 | + while stack: |
| 126 | + path, node = stack.pop() |
| 127 | + if not isinstance(node, dict): |
| 128 | + continue |
| 129 | + schema = cast(dict[str, Any], node) |
| 130 | + yield path, schema |
| 131 | + for kw, val in schema.items(): |
| 132 | + if kw == "properties" and isinstance(val, dict): |
| 133 | + for name, sub in cast(dict[str, Any], val).items(): |
| 134 | + stack.append(((*path, name) if path is not None else None, sub)) |
| 135 | + elif kw in _SUBSCHEMA_SINGLE: |
| 136 | + stack.append((None, val)) |
| 137 | + elif kw in _SUBSCHEMA_LIST and isinstance(val, list): |
| 138 | + stack.extend((None, sub) for sub in cast(list[Any], val)) |
| 139 | + elif kw in _SUBSCHEMA_MAP and isinstance(val, dict): |
| 140 | + stack.extend((None, sub) for sub in cast(dict[str, Any], val).values()) |
88 | 141 |
|
89 | 142 |
|
90 | 143 | def encode_header_value(value: str) -> str: |
@@ -123,31 +176,33 @@ def decode_header_value(value: str | None) -> str | None: |
123 | 176 | def find_invalid_x_mcp_header(input_schema: Any) -> str | None: |
124 | 177 | """Return a reason string if any `x-mcp-header` annotation in `input_schema` is invalid; else `None`. |
125 | 178 |
|
126 | | - The spec restricts the annotation to top-level primitive properties whose |
127 | | - header name is a non-empty RFC 9110 token unique (case-insensitively) within |
128 | | - the schema. A `None` / non-object / property-less schema has nothing to |
129 | | - validate and returns `None`. |
| 179 | + Walks every JSON Schema 2020-12 schema position. An annotation is valid |
| 180 | + only when it sits on a property statically reachable from the root via a |
| 181 | + chain of pure `properties` keys, names a non-empty RFC 9110 token, is on |
| 182 | + an integer/string/boolean property, and is case-insensitively unique |
| 183 | + across the whole schema. A `None` / non-mapping schema has no schema |
| 184 | + positions and returns `None`. |
130 | 185 | """ |
131 | | - match input_schema: |
132 | | - case {"properties": {**properties}}: |
133 | | - pass |
134 | | - case _: |
135 | | - return None |
136 | 186 | seen: dict[str, str] = {} |
137 | | - for prop_name, raw in properties.items(): |
138 | | - if not isinstance(raw, dict) or X_MCP_HEADER_KEY not in raw: |
| 187 | + for path, schema in _walk_schema_positions(input_schema): |
| 188 | + if X_MCP_HEADER_KEY not in schema: |
139 | 189 | continue |
140 | | - prop_schema = cast(dict[str, Any], raw) |
141 | | - header = prop_schema[X_MCP_HEADER_KEY] |
| 190 | + if not path: # None (off the pure-properties chain) or () (the root itself) |
| 191 | + return f"{X_MCP_HEADER_KEY} found at a schema position not reachable via a pure `properties` chain" |
| 192 | + where = ".".join(path) |
| 193 | + header = schema[X_MCP_HEADER_KEY] |
142 | 194 | if not isinstance(header, str) or not _RFC9110_TOKEN.fullmatch(header): |
143 | | - return f"property {prop_name!r}: {X_MCP_HEADER_KEY} {header!r} is not an RFC 9110 token" |
144 | | - prop_type = prop_schema.get("type") |
| 195 | + return f"property {where!r}: {X_MCP_HEADER_KEY} {header!r} is not an RFC 9110 token" |
| 196 | + prop_type = schema.get("type") |
145 | 197 | if not isinstance(prop_type, str) or prop_type not in _X_MCP_HEADER_PRIMITIVE_TYPES: |
146 | | - return f"property {prop_name!r}: {X_MCP_HEADER_KEY} is only permitted on primitive-typed properties" |
| 198 | + return ( |
| 199 | + f"property {where!r}: {X_MCP_HEADER_KEY} is only permitted on " |
| 200 | + f"integer/string/boolean properties (got {prop_type!r})" |
| 201 | + ) |
147 | 202 | lower = header.lower() |
148 | 203 | if lower in seen: |
149 | | - return f"{X_MCP_HEADER_KEY} {header!r} on property {prop_name!r} duplicates property {seen[lower]!r}" |
150 | | - seen[lower] = prop_name |
| 204 | + return f"{X_MCP_HEADER_KEY} {header!r} on property {where!r} duplicates property {seen[lower]!r}" |
| 205 | + seen[lower] = where |
151 | 206 | return None |
152 | 207 |
|
153 | 208 |
|
|
0 commit comments