From b3a4659164d988c35ff9bfb3fd4be9f1b365def7 Mon Sep 17 00:00:00 2001 From: Aleksey Kuznietsov Date: Fri, 13 Sep 2024 12:31:49 -0400 Subject: [PATCH 1/4] define UTF8_SPACES + minor optimizations define UTF8_SPACES to be able to add more characters --- src/mb_trim.php | 61 +++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/src/mb_trim.php b/src/mb_trim.php index 908a43a..24b5920 100644 --- a/src/mb_trim.php +++ b/src/mb_trim.php @@ -1,4 +1,5 @@ = 80200 && ($encoding === null || $encoding === "UTF-8") && $characters === " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}") { + if (PHP_VERSION_ID >= 80200 && ($encoding === null || $encoding === 'UTF-8') && $characters === UTF8_SPACES) { return preg_replace("/^[\s\0]+|[\s\0]+$/uD", '', $string); } @@ -21,26 +22,24 @@ function mb_trim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u throw new ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given', __FUNCTION__, $encoding)); } - if ($characters === "") { + if ($characters === '') { return $string; } if ($encoding !== null && $encoding !== 'UTF-8') { - $string = mb_convert_encoding($string, "UTF-8", $encoding); - $characters = mb_convert_encoding($characters, "UTF-8", $encoding); + $string = mb_convert_encoding($string, 'UTF-8', $encoding); + $characters = mb_convert_encoding($characters, 'UTF-8', $encoding); } $charMap = array_map(static fn(string $char): string => preg_quote($char, '/'), mb_str_split($characters)); $regexClass = implode('', $charMap); - $regex = "/^[" . $regexClass . "]+|[" . $regexClass . "]+$/uD"; + $regex = '/^[' . $regexClass . ']+|[' . $regexClass . ']+$/uD'; $return = preg_replace($regex, '', $string); - if ($encoding !== null && $encoding !== 'UTF-8') { - $return = mb_convert_encoding($return, $encoding, "UTF-8"); - } - - return $return; + return $encoding !== null && $encoding !== 'UTF-8' + ? mb_convert_encoding($return, $encoding, 'UTF-8') + : $return; } /** @@ -52,9 +51,9 @@ function mb_trim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u * * @return string The trimmed string. */ -function mb_ltrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string { +function mb_ltrim(string $string, string $characters = UTF8_SPACES, ?string $encoding = null): string { // On supported versions, use a pre-calculated regex for performance. - if (PHP_VERSION_ID >= 80200 && ($encoding === null || $encoding === "UTF-8") && $characters === " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}") { + if (PHP_VERSION_ID >= 80200 && ($encoding === null || $encoding === 'UTF-8') && $characters === UTF8_SPACES) { return preg_replace("/^[\s\0]+/u", '', $string); } @@ -64,26 +63,24 @@ function mb_ltrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\ throw new ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given', __FUNCTION__, $encoding)); } - if ($characters === "") { + if ($characters === '') { return $string; } if ($encoding !== null && $encoding !== 'UTF-8') { - $string = mb_convert_encoding($string, "UTF-8", $encoding); - $characters = mb_convert_encoding($characters, "UTF-8", $encoding); + $string = mb_convert_encoding($string, 'UTF-8', $encoding); + $characters = mb_convert_encoding($characters, 'UTF-8', $encoding); } $charMap = array_map(static fn(string $char): string => preg_quote($char, '/'), mb_str_split($characters)); $regexClass = implode('', $charMap); - $regex = "/^[" . $regexClass . "]+/u"; + $regex = '/^[' . $regexClass . ']+/u'; $return = preg_replace($regex, '', $string); - if ($encoding !== null && $encoding !== 'UTF-8') { - $return = mb_convert_encoding($return, $encoding, "UTF-8"); - } - - return $return; + return $encoding !== null && $encoding !== 'UTF-8' + ? mb_convert_encoding($return, $encoding, 'UTF-8') + : $return; } /** @@ -95,9 +92,9 @@ function mb_ltrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\ * * @return string The trimmed string. */ -function mb_rtrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string { +function mb_rtrim(string $string, string $characters = UTF8_SPACES, ?string $encoding = null): string { // On supported versions, use a pre-calculated regex for performance. - if (PHP_VERSION_ID >= 80200 && ($encoding === null || $encoding === "UTF-8") && $characters === " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}") { + if (PHP_VERSION_ID >= 80200 && ($encoding === null || $encoding === 'UTF-8') && $characters === UTF8_SPACES) { return preg_replace("/[\s\0]+$/uD", '', $string); } @@ -107,24 +104,22 @@ function mb_rtrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\ throw new ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given', __FUNCTION__, $encoding)); } - if ($characters === "") { + if ($characters === '') { return $string; } if ($encoding !== null && $encoding !== 'UTF-8') { - $string = mb_convert_encoding($string, "UTF-8", $encoding); - $characters = mb_convert_encoding($characters, "UTF-8", $encoding); + $string = mb_convert_encoding($string, 'UTF-8', $encoding); + $characters = mb_convert_encoding($characters, 'UTF-8', $encoding); } $charMap = array_map(static fn(string $char): string => preg_quote($char, '/'), mb_str_split($characters)); $regexClass = implode('', $charMap); - $regex = "/[" . $regexClass . "]+$/uD"; + $regex = '/[' . $regexClass . ']+$/uD'; $return = preg_replace($regex, '', $string); - if ($encoding !== null && $encoding !== 'UTF-8') { - $return = mb_convert_encoding($return, $encoding, "UTF-8"); - } - - return $return; + return $encoding !== null && $encoding !== 'UTF-8' + ? mb_convert_encoding($return, $encoding, 'UTF-8') + : $return; } From 1f873efc39f9ca0c648c349a7b22e8c7986d57f2 Mon Sep 17 00:00:00 2001 From: Aleksey Kuznietsov Date: Fri, 13 Sep 2024 12:33:56 -0400 Subject: [PATCH 2/4] Comment on UTF8_SPACES --- src/mb_trim.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mb_trim.php b/src/mb_trim.php index 24b5920..86c8424 100644 --- a/src/mb_trim.php +++ b/src/mb_trim.php @@ -1,4 +1,6 @@ Date: Fri, 13 Sep 2024 12:43:51 -0400 Subject: [PATCH 3/4] Update mb_trim.php --- src/mb_trim.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mb_trim.php b/src/mb_trim.php index 86c8424..d3d0a22 100644 --- a/src/mb_trim.php +++ b/src/mb_trim.php @@ -1,5 +1,5 @@ Date: Fri, 13 Sep 2024 14:25:28 -0400 Subject: [PATCH 4/4] Update MbTrimTest.php --- tests/MbTrimTest.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/MbTrimTest.php b/tests/MbTrimTest.php index 123fbe9..7df2969 100644 --- a/tests/MbTrimTest.php +++ b/tests/MbTrimTest.php @@ -71,6 +71,9 @@ public function testMbTrim(): void { $this->assertSame("foo\n", mb_trim("foo\n", "o")); $this->assertSame("foo\n", mb_rtrim("foo\n", "o")); + // AK 2024-09-13: please test this one too! + //$this->assertSame(' —888-888-00-00-', mb_trim(' —888-888-00-00-', UTF8_SPACES.'-–—')); + $this->expectException(\ValueError::class); mb_trim( "\u{180F}", "", "NULL"); }