Skip to content

Commit

Permalink
URLPattern: Implement compareComponent() method.
Browse files Browse the repository at this point in the history
This CL adds a prototype URLPattern.compareComponent() to provide a
natural ordering to URLPattern pattern strings.  This was based on
feedback from routing framework authors and there is some discussion
in:

whatwg/urlpattern#61

The general algorithm is to compare the component patterns Part by Part.
The PartType, Modifier, and text contents are compared for each Part,
but group names are not considered.  The end result is a mostly
lexicographical ordering based on fixed text.  Matching groups and
modifiers are ordered such that more restrictive patterns are greater.

Bug: 1232795
Change-Id: I8474cd7d7689e657c9c74c552ad630cdcdd86c95
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3052630
Commit-Queue: Ben Kelly <[email protected]>
Reviewed-by: Jeremy Roman <[email protected]>
Cr-Commit-Position: refs/heads/master@{#906025}
NOKEYCHECK=True
GitOrigin-RevId: 22c632e87d6a2113f28ecff375e2d5b6f69c9710
  • Loading branch information
wanderview authored and copybara-github committed Jul 28, 2021
1 parent 5a2e841 commit 367e336
Show file tree
Hide file tree
Showing 17 changed files with 341 additions and 27 deletions.
1 change: 1 addition & 0 deletions blink/public/mojom/web_feature/web_feature.mojom
Original file line number Diff line number Diff line change
Expand Up @@ -3278,6 +3278,7 @@ enum WebFeature {
kBlobStoreAccessAcrossAgentClustersInResolveAsURLLoaderFactory = 3963,
kBlobStoreAccessAcrossAgentClustersInResolveForNavigation = 3964,
kTapDelayEnabled = 3965,
kV8URLPattern_CompareComponent_Method = 3966,

// Add new features immediately above this line. Don't change assigned
// numbers of any item, and don't reuse removed slots.
Expand Down
2 changes: 2 additions & 0 deletions blink/renderer/bindings/generated_in_modules.gni
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,8 @@ generated_dictionary_sources_in_modules = [
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_text_decoder_options.h",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_text_encoder_encode_into_result.cc",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_text_encoder_encode_into_result.h",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component.cc",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component.h",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component_result.cc",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component_result.h",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_init.cc",
Expand Down
30 changes: 30 additions & 0 deletions blink/renderer/modules/url_pattern/url_pattern.cc
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,36 @@ String URLPattern::hash() const {
return hash_->GeneratePatternString();
}

// static
int URLPattern::compareComponent(const V8URLPatternComponent& component,
const URLPattern* left,
const URLPattern* right) {
switch (component.AsEnum()) {
case V8URLPatternComponent::Enum::kProtocol:
return url_pattern::Component::Compare(*left->protocol_,
*right->protocol_);
case V8URLPatternComponent::Enum::kUsername:
return url_pattern::Component::Compare(*left->username_,
*right->username_);
case V8URLPatternComponent::Enum::kPassword:
return url_pattern::Component::Compare(*left->password_,
*right->password_);
case V8URLPatternComponent::Enum::kHostname:
return url_pattern::Component::Compare(*left->hostname_,
*right->hostname_);
case V8URLPatternComponent::Enum::kPort:
return url_pattern::Component::Compare(*left->port_, *right->port_);
case V8URLPatternComponent::Enum::kPathname:
return url_pattern::Component::Compare(*left->pathname_,
*right->pathname_);
case V8URLPatternComponent::Enum::kSearch:
return url_pattern::Component::Compare(*left->search_, *right->search_);
case V8URLPatternComponent::Enum::kHash:
return url_pattern::Component::Compare(*left->hash_, *right->hash_);
}
NOTREACHED();
}

void URLPattern::Trace(Visitor* visitor) const {
visitor->Trace(protocol_);
visitor->Trace(username_);
Expand Down
6 changes: 5 additions & 1 deletion blink/renderer/modules/url_pattern/url_pattern.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

Expand All @@ -7,6 +6,7 @@

#include "base/types/pass_key.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_typedefs.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component.h"
#include "third_party/blink/renderer/modules/modules_export.h"
#include "third_party/blink/renderer/platform/bindings/script_wrappable.h"
#include "third_party/liburlpattern/parse.h"
Expand Down Expand Up @@ -69,6 +69,10 @@ class MODULES_EXPORT URLPattern : public ScriptWrappable {
String search() const;
String hash() const;

static int compareComponent(const V8URLPatternComponent& component,
const URLPattern* left,
const URLPattern* right);

void Trace(Visitor* visitor) const override;

private:
Expand Down
7 changes: 7 additions & 0 deletions blink/renderer/modules/url_pattern/url_pattern.idl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

typedef (USVString or URLPatternInit) URLPatternInput;

enum URLPatternComponent { "protocol", "username", "password", "hostname",
"port", "pathname", "search", "hash" };

// https://wicg.github.io/urlpattern/
[
Exposed=(Window,Worker),
Expand All @@ -26,4 +29,8 @@ typedef (USVString or URLPatternInit) URLPatternInput;
readonly attribute USVString pathname;
readonly attribute USVString search;
readonly attribute USVString hash;

[Measure]
static short compareComponent(URLPatternComponent component,
URLPattern left, URLPattern right);
};
115 changes: 112 additions & 3 deletions blink/renderer/modules/url_pattern/url_pattern_component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,12 @@ liburlpattern::EncodeCallback GetEncodeCallback(Component::Type type,
// Utility method to get the correct liburlpattern parse options for a given
// type.
const liburlpattern::Options& GetOptions(Component::Type type) {
using liburlpattern::Options;

// The liburlpattern::Options to use for most component patterns. We
// default to strict mode and case sensitivity. In addition, most
// components have no concept of a delimiter or prefix character.
DEFINE_THREAD_SAFE_STATIC_LOCAL(liburlpattern::Options, default_options,
DEFINE_THREAD_SAFE_STATIC_LOCAL(Options, default_options,
({.delimiter_list = "",
.prefix_list = "",
.sensitive = true,
Expand All @@ -106,7 +108,7 @@ const liburlpattern::Options& GetOptions(Component::Type type) {
// by default. Note, hostnames are case insensitive but we require case
// sensitivity here. This assumes that the hostname values have already
// been normalized to lower case as in URL().
DEFINE_THREAD_SAFE_STATIC_LOCAL(liburlpattern::Options, hostname_options,
DEFINE_THREAD_SAFE_STATIC_LOCAL(Options, hostname_options,
({.delimiter_list = ".",
.prefix_list = "",
.sensitive = true,
Expand All @@ -116,7 +118,7 @@ const liburlpattern::Options& GetOptions(Component::Type type) {
// "/" delimiter controlling how far a named group like ":bar" will match
// by default. It also configures "/" to be treated as an automatic
// prefix before groups.
DEFINE_THREAD_SAFE_STATIC_LOCAL(liburlpattern::Options, pathname_options,
DEFINE_THREAD_SAFE_STATIC_LOCAL(Options, pathname_options,
({.delimiter_list = "/",
.prefix_list = "/",
.sensitive = true,
Expand All @@ -138,6 +140,88 @@ const liburlpattern::Options& GetOptions(Component::Type type) {
NOTREACHED();
}

// Utility function to return a statically allocated Part list.
const std::vector<liburlpattern::Part>& GetWildcardOnlyPartList() {
using liburlpattern::Modifier;
using liburlpattern::Part;
using liburlpattern::PartType;
DEFINE_THREAD_SAFE_STATIC_LOCAL(
std::vector<Part>, instance,
({Part(PartType::kFullWildcard,
/*name=*/"",
/*prefix=*/"", /*value=*/"", /*suffix=*/"", Modifier::kNone)}));
return instance;
}

int ComparePart(const liburlpattern::Part& lh, const liburlpattern::Part& rh) {
// We prioritize PartType in the ordering so we can favor fixed text. The
// type ordering is:
//
// kFixed > kRegex > kSegmentWildcard > kFullWildcard.
//
// We considered kRegex greater than the wildcards because it is likely to be
// used for imposing some constraint and not just duplicating wildcard
// behavior.
//
// This comparison depends on the PartType enum in liburlpattern having the
// correct corresponding numeric values.
//
// Next the Modifier is considered:
//
// kNone > kOneOrMore > kOptional > kZeroOrMore.
//
// The rationale here is that requring the match group to exist is more
// restrictive then making it optional and requiring an exact count is more
// restrictive than repeating.
//
// This comparison depends on the Modifier enum in liburlpattern having the
// correct corresponding numeric values.
//
// Finally we lexicographically compare the text components from left to
// right; `prefix`, `value`, and `suffix`. Its ok to depend on simple
// byte-wise string comparison here because the values have all been URL
// encoded. This guarantees the strings contain only ASCII.
auto left = std::tie(lh.type, lh.modifier, lh.prefix, lh.value, lh.suffix);
auto right = std::tie(rh.type, rh.modifier, rh.prefix, rh.value, rh.suffix);
if (left < right)
return -1;
else if (left == right)
return 0;
else
return 1;
}

// Utility method to compare two part lists.
int ComparePartList(const std::vector<liburlpattern::Part>& lh,
const std::vector<liburlpattern::Part>& rh) {
using liburlpattern::Modifier;
using liburlpattern::Part;
using liburlpattern::PartType;

// Begin by comparing each Part in the lists with each other. If any
// are not equal, then we are done.
size_t i = 0;
for (; i < lh.size() && i < rh.size(); ++i) {
int r = ComparePart(lh[i], rh[i]);
if (r)
return r;
}

// We reached the end of at least one of the lists without finding a
// difference. However, we must handle the case where one list is longer
// than the other. In this case we compare the next Part from the
// longer list to a synthetically created empty kFixed Part. This is
// necessary in order for "/foo/" to be considered more restrictive, and
// therefore greater, than "/foo/*".
if (i == lh.size() && i != rh.size())
return ComparePart(Part(PartType::kFixed, "", Modifier::kNone), rh[i]);
else if (i != lh.size() && i == rh.size())
return ComparePart(lh[i], Part(PartType::kFixed, "", Modifier::kNone));

// No differences were found, so declare them equal.
return 0;
}

} // anonymous namespace

// static
Expand Down Expand Up @@ -218,6 +302,31 @@ Component* Component::Compile(const String& pattern,
std::move(wtf_name_list), base::PassKey<Component>());
}

// static
int Component::Compare(const Component& lh, const Component& rh) {
using liburlpattern::Modifier;
using liburlpattern::Part;
using liburlpattern::PartType;

// If both the left and right components are empty wildcards, then they are
// effectively equal.
if (!lh.pattern_.has_value() && !rh.pattern_.has_value())
return 0;

// If one side has a real pattern and the other side is an empty component,
// then we have to compare to a part list with a single full wildcard.
if (lh.pattern_.has_value() && !rh.pattern_.has_value()) {
return ComparePartList(lh.pattern_->PartList(), GetWildcardOnlyPartList());
}

if (!lh.pattern_.has_value() && rh.pattern_.has_value()) {
return ComparePartList(GetWildcardOnlyPartList(), rh.pattern_->PartList());
}

// Otherwise compare the part lists of the patterns on each side.
return ComparePartList(lh.pattern_->PartList(), rh.pattern_->PartList());
}

Component::Component(Type type,
liburlpattern::Pattern pattern,
ScriptRegexp* regexp,
Expand Down
7 changes: 7 additions & 0 deletions blink/renderer/modules/url_pattern/url_pattern_component.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ class Component final : public GarbageCollected<Component> {
Component* protocol_component,
ExceptionState& exception_state);

// Compare the pattern strings in the two given components. This provides a
// mostly lexicographical ordering based on fixed text in the patterns.
// Matching groups and modifiers are treated such that more restrictive
// patterns are greater in value. Group names are not considered in the
// comparison.
static int Compare(const Component& lh, const Component& rh);

// Constructs a Component with a real `pattern` that compiled to the given
// `regexp`.
Component(Type type,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
[
{
"component": "pathname",
"left": { "pathname": "/foo/a" },
"right": { "pathname": "/foo/b" },
"expected": -1
},
{
"component": "pathname",
"left": { "pathname": "/foo/b" },
"right": { "pathname": "/foo/bar" },
"expected": -1
},
{
"component": "pathname",
"left": { "pathname": "/foo/bar" },
"right": { "pathname": "/foo/:bar" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/" },
"right": { "pathname": "/foo/:bar" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/:bar" },
"right": { "pathname": "/foo/*" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/{bar}" },
"right": { "pathname": "/foo/(bar)" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/{bar}" },
"right": { "pathname": "/foo/{bar}+" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/{bar}+" },
"right": { "pathname": "/foo/{bar}?" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/{bar}?" },
"right": { "pathname": "/foo/{bar}*" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/(123)" },
"right": { "pathname": "/foo/(12)" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/:b" },
"right": { "pathname": "/foo/:a" },
"expected": 0
},
{
"component": "pathname",
"left": { "pathname": "*/foo" },
"right": { "pathname": "*" },
"expected": 1
},
{
"component": "port",
"left": { "port": "9" },
"right": { "port": "100" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "foo/:bar?/baz" },
"right": { "pathname": "foo/{:bar}?/baz" },
"expected": -1
},
{
"component": "pathname",
"left": { "pathname": "foo/:bar?/baz" },
"right": { "pathname": "foo{/:bar}?/baz" },
"expected": 0
},
{
"component": "pathname",
"left": { "pathname": "foo/:bar?/baz" },
"right": { "pathname": "fo{o/:bar}?/baz" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "foo/:bar?/baz" },
"right": { "pathname": "foo{/:bar/}?baz" },
"expected": -1
},
{
"component": "pathname",
"left": "https://a.example.com/b?a",
"right": "https://b.example.com/a?b",
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/{bar}/baz" },
"right": { "pathname": "/foo/bar/baz" },
"expected": 0
}
]
Loading

0 comments on commit 367e336

Please sign in to comment.