From ec441588a8b5dd289ef4aa9b98835cc67b28122a Mon Sep 17 00:00:00 2001 From: nnatter Date: Tue, 3 Nov 2020 14:13:37 +0100 Subject: [PATCH] Allow valid XML characters outside of BMP when storing property if jackrabbit version is good enough --- CHANGELOG.md | 9 ++++ src/Jackalope/RepositoryFactoryJackrabbit.php | 4 ++ src/Jackalope/Transport/Jackrabbit/Client.php | 49 ++++++++++++++++++- .../Transport/Jackrabbit/ClientTest.php | 29 ++++++----- 4 files changed, 76 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e1b7483..5dbbfcc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,15 @@ Changelog ========= +1.4.2 +----- + +* Added the repository factory option `jackalope.jackrabbit_version` to allow + specifying the version of the Jackrabbit backend. The version is used to + decide if the backend supports full UTF-8 for node names (so you can use + emojis in node and property names). The minimum Jackrabbit version supporting + emojis is 2.18.0. + 1.4.1 ----- diff --git a/src/Jackalope/RepositoryFactoryJackrabbit.php b/src/Jackalope/RepositoryFactoryJackrabbit.php index 455f37c4..d67446fc 100644 --- a/src/Jackalope/RepositoryFactoryJackrabbit.php +++ b/src/Jackalope/RepositoryFactoryJackrabbit.php @@ -45,6 +45,7 @@ class RepositoryFactoryJackrabbit implements RepositoryFactoryInterface Session::OPTION_AUTO_LASTMODIFIED => 'boolean: Whether to automatically update nodes having mix:lastModified. Defaults to true.', 'jackalope.jackrabbit_force_http_version_10' => 'boolean: Force HTTP version 1.0, this can in solving problems with curl such as https://github.com/jackalope/jackalope-jackrabbit/issues/89', 'jackalope.jackrabbit_curl_options' => 'array: Additional global curl-options', + 'jackalope.jackrabbit_version' => 'string: Set the version of the jackrabbit server to allow the client to offer better functionality if possible', ); /** @@ -96,6 +97,9 @@ public function getRepository(array $parameters = null) if (isset($parameters['jackalope.check_login_on_server'])) { $transport->setCheckLoginOnServer($parameters['jackalope.check_login_on_server']); } + if (isset($parameters['jackalope.jackrabbit_version'])) { + $transport->setVersion($parameters['jackalope.jackrabbit_version']); + } if (isset($parameters['jackalope.logger'])) { $transport = $factory->get( 'Transport\Jackrabbit\LoggingClient', diff --git a/src/Jackalope/Transport/Jackrabbit/Client.php b/src/Jackalope/Transport/Jackrabbit/Client.php index 3ded836c..ecbc4de1 100644 --- a/src/Jackalope/Transport/Jackrabbit/Client.php +++ b/src/Jackalope/Transport/Jackrabbit/Client.php @@ -69,10 +69,19 @@ class Client extends BaseTransport implements JackrabbitClientInterface { /** - * minimal version needed for the backend server + * Minimal version requirement for the Jackrabbit backend server. */ const VERSION = "2.3.6"; + /** + * Minimal version of the Jackrabbit backend server known to support + * storing unicode symbols outside of UTF-8 basic multilingual plane. + * + * Note: We are sure which exact version of Jackrabbit introduced support + * for full UTF-8 symbols. This is the lowest version known to work. + */ + const UTF8_SUPPORT_MINIMAL_VERSION = "2.18.0"; + /** * Description of the namspace to be used for communication with the server. * @var string @@ -214,6 +223,13 @@ class Client extends BaseTransport implements JackrabbitClientInterface */ private $curlOptions = array(); + /** + * Version of the Jackrabbit server as declared in the configuration. + * + * @var string|null + */ + private $version = null; + /** * Create a transport pointing to a server url. * @@ -455,6 +471,25 @@ public function getRepositoryDescriptors() $this->descriptors['jcr.repository.version']. '". Need at least "'.self::VERSION.'"'); } + + + if ($this->version) { + // Sanity check if the configured version has the same major and minor number as the version reported by the backend. + $serverVersion = implode('.', array_slice(explode('.', $this->descriptors['jcr.repository.version']), 0, 2)); + $configuredVersion = implode('.', array_slice(explode('.', $this->version), 0, 2)); + + if (!version_compare($serverVersion, $configuredVersion, '==')) { + trigger_error( + sprintf( + 'Version mismatch between configured version %s and version %s reported by the backend at %s.', + $this->version, + $this->descriptors['jcr.repository.version'], + $this->server + ), + E_USER_NOTICE + ); + } + } } return $this->descriptors; @@ -1319,7 +1354,7 @@ private function storeProperty(Property $property) * If occurrence is found, returns false, otherwise true. * Invalid characters were taken from this list: http://en.wikipedia.org/wiki/Valid_characters_in_XML#XML_1.0 * - * Uses regexp mentioned here: http://stackoverflow.com/a/961504 + * Uses regexp built upon: http://stackoverflow.com/a/961504, https://stackoverflow.com/a/30240915 * * @param $string string value * @return bool true if string is OK, false otherwise. @@ -1328,6 +1363,11 @@ protected function isStringValid($string) { $regex = '/[^\x{9}\x{a}\x{d}\x{20}-\x{D7FF}\x{E000}-\x{FFFD}]+/u'; + if ($this->version && version_compare($this->version, self::UTF8_SUPPORT_MINIMAL_VERSION, '>=')) { + // unicode symbols outside of bmp such as emojis are supported only by recent jackrabbit versions + $regex = '/[^\x{9}\x{a}\x{d}\x{20}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]+/u'; + } + return (preg_match($regex, $string, $matches) === 0); } @@ -2171,4 +2211,9 @@ protected function getMimePart($name, $value, $mime_boundary) return $data; } + + public function setVersion($version) + { + $this->version = $version; + } } diff --git a/tests/Jackalope/Transport/Jackrabbit/ClientTest.php b/tests/Jackalope/Transport/Jackrabbit/ClientTest.php index dfa31a19..06208402 100644 --- a/tests/Jackalope/Transport/Jackrabbit/ClientTest.php +++ b/tests/Jackalope/Transport/Jackrabbit/ClientTest.php @@ -555,7 +555,7 @@ public function deleteNodesProvider(): array /** * @dataProvider provideTestOutOfRangeCharacters */ - public function testOutOfRangeCharacterOccurrence($string, $isValid): void + public function testOutOfRangeCharacterOccurrence($string, $version, $isValid): void { if (false === $isValid) { $this->expectException(ValueFormatException::class); @@ -563,6 +563,7 @@ public function testOutOfRangeCharacterOccurrence($string, $isValid): void } $t = $this->getTransportMock(); + $t->setVersion($version); $factory = new Factory; $session = $this->createMock(Session::class); @@ -596,18 +597,20 @@ public function provideTestOutOfRangeCharacters(): array { // use http://rishida.net/tools/conversion/ to convert problematic utf-16 strings to code points return array( - array('This is valid too!'.$this->translateCharFromCode('\u0009'), true), - array('This is valid', true), - array($this->translateCharFromCode('\uD7FF'), true), - array('This is on the edge, but valid too.'. $this->translateCharFromCode('\uFFFD'), true), - array($this->translateCharFromCode('\u10000'), true), - array($this->translateCharFromCode('\u10FFFF'), true), - array($this->translateCharFromCode('\u0001'), false), - array($this->translateCharFromCode('\u0002'), false), - array($this->translateCharFromCode('\u0003'), false), - array($this->translateCharFromCode('\u0008'), false), - array($this->translateCharFromCode('\uFFFF'), false), - array($this->translateCharFromCode('Sporty Spice at Sporty spice @ work \uD83D\uDCAA\uD83D\uDCAA\uD83D\uDCAA'), false), + array('This is valid too!'.$this->translateCharFromCode('\u0009'), null, true), + array('This is valid', null, true), + array($this->translateCharFromCode('\uD7FF'), null, true), + array('This is on the edge, but valid too.'. $this->translateCharFromCode('\uFFFD'), null, true), + array($this->translateCharFromCode('\u10000'), null, true), + array($this->translateCharFromCode('\u10FFFF'), null, true), + array($this->translateCharFromCode('\u0001'), null, false), + array($this->translateCharFromCode('\u0002'), null, false), + array($this->translateCharFromCode('\u0003'), null, false), + array($this->translateCharFromCode('\u0008'), null, false), + array($this->translateCharFromCode('\uFFFF'), null, false), + array($this->translateCharFromCode('Sporty Spice at Sporty spice @ work \uD83D\uDCAA\uD83D\uDCAA\uD83D\uDCAA'), null, false), + array($this->translateCharFromCode('Sporty Spice at Sporty spice @ work \uD83D\uDCAA\uD83D\uDCAA\uD83D\uDCAA'), '2.8.0', false), + array($this->translateCharFromCode('Sporty Spice at Sporty spice @ work \uD83D\uDCAA\uD83D\uDCAA\uD83D\uDCAA'), '2.18.1', true), ); }