|
15 | 15 | Matching is not specified by RFC 6570. A few templates can expand to |
16 | 16 | URIs that ``match()`` cannot unambiguously reverse: |
17 | 17 |
|
18 | | -* Multi-variable reserved expressions like ``{+x,y}`` use a comma as |
19 | | - separator but also permit commas *inside* values (commas are in the |
20 | | - reserved set). ``match("a,b,c")`` cannot know which comma is the |
21 | | - separator. The matcher takes the last comma as the split point; if |
22 | | - your values contain commas, prefer separate expressions (``{+x}/{+y}``) |
23 | | - or a different operator. |
| 18 | +* Reserved/fragment expressions (``{+var}``, ``{#var}``) are restricted |
| 19 | + to positions that avoid quadratic-time backtracking: at most one per |
| 20 | + template, and not immediately adjacent to another expression. The |
| 21 | + ``[^?#]*`` pattern overlaps with every other operator's character |
| 22 | + class; a failing match against ``{+a}{b}`` or ``{+a}/x/{+b}`` |
| 23 | + backtracks O(n²). Use a literal separator before a bounded |
| 24 | + expression (``{+a}/sep/{b}``) or put the reserved expression last |
| 25 | + (``file://docs/{+path}``). Trailing ``{?...}``/``{&...}`` query |
| 26 | + expressions are always fine since they're matched via ``parse_qs``. |
24 | 27 |
|
25 | 28 | * Reserved expansion ``{+var}`` leaves ``?`` and ``#`` unencoded, but |
26 | 29 | the match pattern stops at those characters so that templates like |
@@ -615,7 +618,7 @@ def _parse(template: str, *, max_expressions: int) -> tuple[list[_Part], list[Va |
615 | 618 | Raises: |
616 | 619 | InvalidUriTemplate: On unclosed braces, too many expressions, or |
617 | 620 | any error surfaced by :func:`_parse_expression` or |
618 | | - :func:`_check_adjacent_explodes`. |
| 621 | + :func:`_check_ambiguous_adjacency`. |
619 | 622 | """ |
620 | 623 | parts: list[_Part] = [] |
621 | 624 | variables: list[Variable] = [] |
@@ -659,7 +662,7 @@ def _parse(template: str, *, max_expressions: int) -> tuple[list[_Part], list[Va |
659 | 662 | # Advance past the closing brace. |
660 | 663 | i = end + 1 |
661 | 664 |
|
662 | | - _check_adjacent_explodes(template, parts) |
| 665 | + _check_ambiguous_adjacency(template, parts) |
663 | 666 | _check_duplicate_variables(template, variables) |
664 | 667 | return parts, variables |
665 | 668 |
|
@@ -752,36 +755,73 @@ def _check_duplicate_variables(template: str, variables: list[Variable]) -> None |
752 | 755 | seen.add(var.name) |
753 | 756 |
|
754 | 757 |
|
755 | | -def _check_adjacent_explodes(template: str, parts: list[_Part]) -> None: |
756 | | - """Reject templates with adjacent explode variables. |
| 758 | +def _check_ambiguous_adjacency(template: str, parts: list[_Part]) -> None: |
| 759 | + """Reject templates where adjacent expressions would cause ambiguous or quadratic matching. |
757 | 760 |
|
758 | | - Patterns like ``{/a*}{/b*}`` are ambiguous for matching: given |
759 | | - ``/x/y/z``, the split between ``a`` and ``b`` is undetermined. |
760 | | - Different operators (``{/a*}{.b*}``) do not help in general because |
761 | | - the first operator's character class often includes the second's |
762 | | - separator, so the first explode greedily consumes both. We reject |
763 | | - all adjacent explodes at parse time rather than picking an arbitrary |
764 | | - resolution. A literal between them (``{/a*}/x{/b*}``) still |
765 | | - disambiguates. |
| 761 | + Two patterns are rejected: |
| 762 | +
|
| 763 | + 1. Adjacent explode variables (``{/a*}{/b*}``): the split between |
| 764 | + ``a`` and ``b`` in ``/x/y/z`` is undetermined. Different |
| 765 | + operators don't help since character classes overlap. |
| 766 | +
|
| 767 | + 2. Reserved/fragment expansion in a position that causes quadratic |
| 768 | + backtracking. The ``[^?#]*`` pattern for ``+`` and ``#`` |
| 769 | + overlaps with every other operator's character class, so when a |
| 770 | + trailing match fails the engine backtracks through O(n) split |
| 771 | + points. Two conditions trigger this: |
| 772 | +
|
| 773 | + - ``{+var}`` immediately adjacent to any expression |
| 774 | + (``{+a}{b}``, ``{+a}{/b*}``) |
| 775 | + - Two ``{+var}``/``{#var}`` anywhere in the path, even with a |
| 776 | + literal between them (``{+a}/x/{+b}``) — the literal does not |
| 777 | + disambiguate since ``[^?#]*`` matches it too |
| 778 | +
|
| 779 | + A 64KB payload against either can consume tens of seconds of CPU. |
| 780 | +
|
| 781 | + Trailing ``{?...}``/``{&...}`` expressions are handled via |
| 782 | + ``parse_qs`` outside the path regex, so they do not count against |
| 783 | + any check. |
766 | 784 |
|
767 | 785 | Raises: |
768 | | - InvalidUriTemplate: If two explode variables appear with no |
769 | | - literal or non-explode variable between them. |
| 786 | + InvalidUriTemplate: If any pattern is detected. |
770 | 787 | """ |
771 | 788 | prev_explode = False |
| 789 | + prev_reserved = False |
| 790 | + seen_reserved = False |
772 | 791 | for part in parts: |
773 | 792 | if isinstance(part, str): |
774 | | - # Literal text breaks any adjacency. |
| 793 | + # A literal breaks immediate adjacency but does not reset |
| 794 | + # the seen-reserved count: [^?#]* matches most literals. |
775 | 795 | prev_explode = False |
| 796 | + prev_reserved = False |
776 | 797 | continue |
777 | 798 | for var in part.variables: |
778 | | - if var.explode: |
779 | | - if prev_explode: |
780 | | - raise InvalidUriTemplate( |
781 | | - "Adjacent explode expressions are ambiguous for matching and not supported", |
782 | | - template=template, |
783 | | - ) |
784 | | - prev_explode = True |
785 | | - else: |
786 | | - # A non-explode variable also breaks adjacency. |
| 799 | + # ?/& are stripped before pattern building and never reach |
| 800 | + # the path regex. |
| 801 | + if var.operator in ("?", "&"): |
787 | 802 | prev_explode = False |
| 803 | + prev_reserved = False |
| 804 | + continue |
| 805 | + |
| 806 | + if prev_reserved: |
| 807 | + raise InvalidUriTemplate( |
| 808 | + "{+var} or {#var} immediately followed by another expression " |
| 809 | + "causes quadratic-time matching; separate them with a literal", |
| 810 | + template=template, |
| 811 | + ) |
| 812 | + if var.operator in ("+", "#") and seen_reserved: |
| 813 | + raise InvalidUriTemplate( |
| 814 | + "Multiple {+var} or {#var} expressions in one template cause " |
| 815 | + "quadratic-time matching even with literals between them", |
| 816 | + template=template, |
| 817 | + ) |
| 818 | + if var.explode and prev_explode: |
| 819 | + raise InvalidUriTemplate( |
| 820 | + "Adjacent explode expressions are ambiguous for matching and not supported", |
| 821 | + template=template, |
| 822 | + ) |
| 823 | + |
| 824 | + prev_explode = var.explode |
| 825 | + prev_reserved = var.operator in ("+", "#") |
| 826 | + if prev_reserved: |
| 827 | + seen_reserved = True |
0 commit comments