From c44615f3e29ca29a095e7c0a3d49c5a0ba768a41 Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Sat, 19 Aug 2023 16:59:15 +0200 Subject: [PATCH] Introduce the Encoder class * Introduce the Encoder class to normalize encoding/decoding in all packages * Introduce the KeyValuePairConverter class to normalize key/value parsing and building * Rewrite QueryString parser/builder class * Add new methods to QueryString --- CHANGELOG.md | 18 ++++++++ Uri.php | 92 ++++++---------------------------------- UriTemplate/Operator.php | 36 +--------------- 3 files changed, 33 insertions(+), 113 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fbe4630d..06666351 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,24 @@ All Notable changes to `League\Uri` will be documented in this file +## Next - TBD + +### Added + +- None + +### Fixed + +- Using the `Encoder` class to normalize encoding and decoding in all packages + +### Deprecated + +- None + +### Removed + +- None + ## [7.0.0](https://github.com/thephpleague/uri/compare/6.8.0...7.0.0) - 2023-08-10 ### Added diff --git a/Uri.php b/Uri.php index 28f70d81..9b3ec82b 100644 --- a/Uri.php +++ b/Uri.php @@ -70,24 +70,6 @@ final class Uri implements UriInterface */ private const REGEXP_INVALID_CHARS = '/[\x00-\x1f\x7f]/'; - /** - * RFC3986 Sub delimiter characters regular expression pattern. - * - * @link https://tools.ietf.org/html/rfc3986#section-2.2 - * - * @var string - */ - private const REGEXP_CHARS_SUBDELIM = "\!\$&'\(\)\*\+,;\=%"; - - /** - * RFC3986 unreserved characters regular expression pattern. - * - * @link https://tools.ietf.org/html/rfc3986#section-2.3 - * - * @var string - */ - private const REGEXP_CHARS_UNRESERVED = 'A-Za-z\d_\-\.~'; - /** * RFC3986 schema regular expression pattern. * @@ -248,8 +230,8 @@ private function __construct( $this->port = $this->formatPort($port); $this->authority = $this->setAuthority(); $this->path = $this->formatPath($path); - $this->query = $this->formatQueryAndFragment($query); - $this->fragment = $this->formatQueryAndFragment($fragment); + $this->query = Encoder::encodeQueryOrFragment($query); + $this->fragment = Encoder::encodeQueryOrFragment($fragment); $this->assertValidState(); } @@ -280,27 +262,10 @@ private function formatUserInfo( ?string $user, #[SensitiveParameter] ?string $password ): ?string { - if (null === $user) { - return null; - } - - static $userPattern = '/[^%'.self::REGEXP_CHARS_UNRESERVED.self::REGEXP_CHARS_SUBDELIM.']++|%(?![A-Fa-f\d]{2})/'; - $user = preg_replace_callback($userPattern, Uri::urlEncodeMatch(...), $user); - if (null === $password) { - return $user; - } - - static $passwordPattern = '/[^%:'.self::REGEXP_CHARS_UNRESERVED.self::REGEXP_CHARS_SUBDELIM.']++|%(?![A-Fa-f\d]{2})/'; - - return $user.':'.preg_replace_callback($passwordPattern, Uri::urlEncodeMatch(...), $password); - } - - /** - * Returns the RFC3986 encoded string matched. - */ - private static function urlEncodeMatch(array $matches): string - { - return rawurlencode($matches[0]); + return match (true) { + null === $password => Encoder::encodeUser($user), + default => Encoder::encodeUser($user).':'.Encoder::encodePassword($password), + }; } /** @@ -720,21 +685,11 @@ private function setAuthority(): ?string */ private function formatPath(string $path): string { - if ('data' === $this->scheme) { - $path = $this->formatDataPath($path); - } - - if ('/' !== $path) { - static $pattern = '/[^'.self::REGEXP_CHARS_UNRESERVED.self::REGEXP_CHARS_SUBDELIM.':@\/}{]++|%(?![A-Fa-f\d]{2})/'; - - $path = (string) preg_replace_callback($pattern, Uri::urlEncodeMatch(...), $path); - } - - if ('file' === $this->scheme) { - return $this->formatFilePath($path); - } - - return $path; + return match (true) { + 'data' === $this->scheme => Encoder::encodePath($this->formatDataPath($path)), + 'file' === $this->scheme => $this->formatFilePath(Encoder::encodePath($path)), + default => Encoder::encodePath($path), + }; } /** @@ -827,27 +782,6 @@ private function formatFilePath(string $path): string ); } - /** - * Format the Query or the Fragment component. - * - * Returns a array containing: - * - */ - private function formatQueryAndFragment(?string $component): ?string - { - if (null === $component || '' === $component) { - return $component; - } - - static $pattern = '/[^'.self::REGEXP_CHARS_UNRESERVED.self::REGEXP_CHARS_SUBDELIM.':@\/?]++|%(?![A-Fa-f\d]{2})/'; - - return preg_replace_callback($pattern, self::urlEncodeMatch(...), $component); - } - /** * assert the URI internal state is valid. * @@ -1198,7 +1132,7 @@ public function withPath(Stringable|string $path): UriInterface public function withQuery(Stringable|string|null $query): UriInterface { - $query = $this->formatQueryAndFragment($this->filterString($query)); + $query = Encoder::encodeQueryOrFragment($this->filterString($query)); if ($query === $this->query) { return $this; } @@ -1212,7 +1146,7 @@ public function withQuery(Stringable|string|null $query): UriInterface public function withFragment(Stringable|string|null $fragment): UriInterface { - $fragment = $this->formatQueryAndFragment($this->filterString($fragment)); + $fragment = Encoder::encodeQueryOrFragment($this->filterString($fragment)); if ($fragment === $this->fragment) { return $this; } diff --git a/UriTemplate/Operator.php b/UriTemplate/Operator.php index 23ce95f5..375d4c24 100644 --- a/UriTemplate/Operator.php +++ b/UriTemplate/Operator.php @@ -13,6 +13,7 @@ namespace League\Uri\UriTemplate; +use League\Uri\Encoder; use League\Uri\Exceptions\SyntaxError; use Stringable; use function implode; @@ -32,24 +33,6 @@ */ enum Operator: string { - /** - * RFC3986 Sub delimiter characters regular expression pattern. - * - * @link https://tools.ietf.org/html/rfc3986#section-2.2 - * - * @var string - */ - private const REGEXP_CHARS_SUBDELIM = "\!\$&'\(\)\*\+,;\=%"; - - /** - * RFC3986 unreserved characters regular expression pattern. - * - * @link https://tools.ietf.org/html/rfc3986#section-2.3 - * - * @var string - */ - private const REGEXP_CHARS_UNRESERVED = 'A-Za-z\d_\-\.~'; - /** * Expression regular expression pattern. * @@ -104,7 +87,7 @@ public function isNamed(): bool public function decode(string $var): string { return match ($this) { - Operator::ReservedChars, Operator::Fragment => self::encodeQueryOrFragment($var), + Operator::ReservedChars, Operator::Fragment => (string) Encoder::encodeQueryOrFragment($var), default => rawurlencode($var), }; } @@ -238,19 +221,4 @@ private function replaceList(array $value, VarSpecifier $varSpec): array return [implode(',', $pairs), $useQuery]; } - - /** - * Returns the RFC3986 encoded string matched. - */ - private static function urlEncodeMatch(array $matches): string - { - return rawurlencode($matches[0]); - } - - private static function encodeQueryOrFragment(string $uriPart): string - { - static $pattern = '/[^'.self::REGEXP_CHARS_UNRESERVED.self::REGEXP_CHARS_SUBDELIM.':@\/?]++|%(?![A-Fa-f\d]{2})/'; - - return (string) preg_replace_callback($pattern, self::urlEncodeMatch(...), $uriPart); - } }