diff --git a/app/Config/Feature.php b/app/Config/Feature.php index 35024d357919..ec1435af413f 100644 --- a/app/Config/Feature.php +++ b/app/Config/Feature.php @@ -26,4 +26,12 @@ class Feature extends BaseConfig * If false, `limit(0)` returns no records. (the behavior of 3.1.9 or later in version 3.x.) */ public bool $limitZeroAsAll = true; + + /** + * Use strict location negotiation. + * + * By default, the locale is selected based on a loose comparison of the language code (ISO 639-1) + * Enabling strict comparison will also consider the region code (ISO 3166-1 alpha-2). + */ + public bool $strictLocaleNegotiation = false; } diff --git a/system/HTTP/Negotiate.php b/system/HTTP/Negotiate.php index 33938c8f0741..026cafc75f39 100644 --- a/system/HTTP/Negotiate.php +++ b/system/HTTP/Negotiate.php @@ -14,6 +14,7 @@ namespace CodeIgniter\HTTP; use CodeIgniter\HTTP\Exceptions\HTTPException; +use Config\Feature; /** * Class Negotiate @@ -122,11 +123,15 @@ public function encoding(array $supported = []): string * types the application says it supports, and the types requested * by the client. * - * If no match is found, the first, highest-ranking client requested + * If strict locale negotiation is disabled and no match is found, the first, highest-ranking client requested * type is returned. */ public function language(array $supported): string { + if (config(Feature::class)->strictLocaleNegotiation) { + return $this->getBestLocaleMatch($supported, $this->request->getHeaderLine('accept-language')); + } + return $this->getBestMatch($supported, $this->request->getHeaderLine('accept-language'), false, false, true); } @@ -189,6 +194,69 @@ protected function getBestMatch( return $strictMatch ? '' : $supported[0]; } + /** + * Try to find the best matching locale. It supports strict locale comparison. + * + * If Config\App::$supportedLocales have "en-US" and "en-GB" locales, they can be recognized + * as two different locales. This method checks first for the strict match, then fallback + * to the most general locale (in this case "en") ISO 639-1 and finally to the locale variant + * "en-*" (ISO 639-1 plus "wildcard" for ISO 3166-1 alpha-2). + * + * If nothing from above is matched, then it returns the first option from the $supportedLocales array. + * + * @param list $supportedLocales App-supported values + * @param ?string $header Compatible 'Accept-Language' header string + */ + protected function getBestLocaleMatch(array $supportedLocales, ?string $header): string + { + if ($supportedLocales === []) { + throw HTTPException::forEmptySupportedNegotiations(); + } + + if ($header === null || $header === '') { + return $supportedLocales[0]; + } + + $acceptable = $this->parseHeader($header); + $fallbackLocales = []; + + foreach ($acceptable as $accept) { + // if acceptable quality is zero, skip it. + if ($accept['q'] === 0.0) { + continue; + } + + // if acceptable value is "anything", return the first available + if ($accept['value'] === '*') { + return $supportedLocales[0]; + } + + // look for exact match + if (in_array($accept['value'], $supportedLocales, true)) { + return $accept['value']; + } + + // set a fallback locale + $fallbackLocales[] = strtok($accept['value'], '-'); + } + + foreach ($fallbackLocales as $fallbackLocale) { + // look for exact match + if (in_array($fallbackLocale, $supportedLocales, true)) { + return $fallbackLocale; + } + + // look for regional locale match + foreach ($supportedLocales as $locale) { + if (str_starts_with($locale, $fallbackLocale . '-')) { + return $locale; + } + } + } + + return $supportedLocales[0]; + } + /** * Parses an Accept* header into it's multiple values. * diff --git a/tests/system/HTTP/NegotiateTest.php b/tests/system/HTTP/NegotiateTest.php index 18d546c0a9c6..a2731ec5ddbf 100644 --- a/tests/system/HTTP/NegotiateTest.php +++ b/tests/system/HTTP/NegotiateTest.php @@ -16,6 +16,7 @@ use CodeIgniter\HTTP\Exceptions\HTTPException; use CodeIgniter\Test\CIUnitTestCase; use Config\App; +use Config\Feature; use PHPUnit\Framework\Attributes\Group; /** @@ -111,11 +112,23 @@ public function testNegotiatesEncodingBasics(): void public function testAcceptLanguageBasics(): void { - $this->request->setHeader('Accept-Language', 'da, en-gb;q=0.8, en;q=0.7'); + $this->request->setHeader('Accept-Language', 'da, en-gb, en-us;q=0.8, en;q=0.7'); $this->assertSame('da', $this->negotiate->language(['da', 'en'])); $this->assertSame('en-gb', $this->negotiate->language(['en-gb', 'en'])); $this->assertSame('en', $this->negotiate->language(['en'])); + + // Will find the first locale instead of "en-gb" + $this->assertSame('en-us', $this->negotiate->language(['en-us', 'en-gb', 'en'])); + $this->assertSame('en', $this->negotiate->language(['en', 'en-us', 'en-gb'])); + + config(Feature::class)->strictLocaleNegotiation = true; + + $this->assertSame('da', $this->negotiate->language(['da', 'en'])); + $this->assertSame('en-gb', $this->negotiate->language(['en-gb', 'en'])); + $this->assertSame('en', $this->negotiate->language(['en'])); + $this->assertSame('en-gb', $this->negotiate->language(['en-us', 'en-gb', 'en'])); + $this->assertSame('en-gb', $this->negotiate->language(['en', 'en-us', 'en-gb'])); } /** @@ -125,7 +138,19 @@ public function testAcceptLanguageMatchesBroadly(): void { $this->request->setHeader('Accept-Language', 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7'); - $this->assertSame('fr', $this->negotiate->language(['fr', 'en'])); + $this->assertSame('fr', $this->negotiate->language(['fr', 'fr-FR', 'en'])); + $this->assertSame('fr-FR', $this->negotiate->language(['fr-FR', 'fr', 'en'])); + $this->assertSame('fr-BE', $this->negotiate->language(['fr-BE', 'fr', 'en'])); + $this->assertSame('en', $this->negotiate->language(['en', 'en-US'])); + $this->assertSame('fr-BE', $this->negotiate->language(['ru', 'en-GB', 'fr-BE'])); + + config(Feature::class)->strictLocaleNegotiation = true; + + $this->assertSame('fr-FR', $this->negotiate->language(['fr', 'fr-FR', 'en'])); + $this->assertSame('fr-FR', $this->negotiate->language(['fr-FR', 'fr', 'en'])); + $this->assertSame('fr', $this->negotiate->language(['fr-BE', 'fr', 'en'])); + $this->assertSame('en-US', $this->negotiate->language(['en', 'en-US'])); + $this->assertSame('fr-BE', $this->negotiate->language(['ru', 'en-GB', 'fr-BE'])); } public function testBestMatchEmpty(): void diff --git a/user_guide_src/source/changelogs/v4.6.0.rst b/user_guide_src/source/changelogs/v4.6.0.rst index 2c75473b5220..db4e8abaa07b 100644 --- a/user_guide_src/source/changelogs/v4.6.0.rst +++ b/user_guide_src/source/changelogs/v4.6.0.rst @@ -239,6 +239,13 @@ Routing - Now you can specify multiple hostnames when restricting routes. +Negotiator +========== + +- Added a feature flag ``Feature::$strictLocaleNegotiation`` to enable strict locale comparision. + Previously, response with language headers ``Accept-language: en-US,en-GB;q=0.9`` returned the first allowed language ``en`` could instead of the exact language ``en-US`` or ``en-GB``. + Set the value to ``true`` to enable comparison not only by language code ('en' - ISO 639-1) but also by regional code ('en-US' - ISO 639-1 plus ISO 3166-1 alpha). + Testing ======= diff --git a/user_guide_src/source/incoming/content_negotiation.rst b/user_guide_src/source/incoming/content_negotiation.rst index 702bb40afe64..ba176feaebda 100644 --- a/user_guide_src/source/incoming/content_negotiation.rst +++ b/user_guide_src/source/incoming/content_negotiation.rst @@ -102,6 +102,49 @@ and German you would do something like: In this example, 'en' would be returned as the current language. If no match is found, it will return the first element in the ``$supported`` array, so that should always be the preferred language. +Strict Locale Negotiation +------------------------- + +.. versionadded:: 4.6.0 + +By default, locale is determined on a lossy comparison basis. So only the first part of the locale string is taken +into account (language). This is usually sufficient. But sometimes we want to be able to distinguish between regional versions such as +``en-US`` and ``en-GB`` to serve different content. + +For such cases, we have introduced a new setting that can be enabled via ``Config\Feature::$strictLocaleNegotiation``. This will ensure +that the strict comparison will be made in the first place. + +.. note:: + + CodeIgniter comes with translations only for primary language tags ('en', 'fr', etc.). So if you enable this feature and your + settings in ``Config\App::$supportedLocales`` include regional language tags ('en-US', 'fr-FR', etc.), then keep in mind that + if you have your own translation files, you **must also change** the folder names for CodeIgniter's translation files to match + what you put in the ``$supportedLocales`` array. + +Now let's consider the below example. The browser's preferred language will be set as this:: + + GET /foo HTTP/1.1 + Accept-Language: fr; q=1.0, en-GB; q=0.5 + +In this example, the browser would prefer French, with a second choice of English (United Kingdom). Your website on another hand +supports German and English (United States): + +.. literalinclude:: content_negotiation/008.php + +In this example, 'en-US' would be returned as the current language. If no match is found, it will return the first element +in the ``$supported`` array. Here is how exactly the locale selection process works. + +Even though the 'fr' is preferred by the browser it is not in our ``$supported`` array. The same problem occurs with 'en-GB', but here +we will be able to search for variants. First, we will fallback to the most general locale (in this case 'en') which again is not in our +array. Then we will search for the regional locale 'en-'. And that's when our value from the ``$supported`` array will be matched. +We will return 'en-US'. + +So the process of selecting a locale is as follows: + +#. strict match ('en-GB') - ISO 639-1 plus ISO 3166-1 alpha-2 +#. general locale match ('en') - ISO 639-1 +#. regional locale match ('en-') - ISO 639-1 plus "wildcard" for ISO 3166-1 alpha-2 + Encoding ======== diff --git a/user_guide_src/source/incoming/content_negotiation/008.php b/user_guide_src/source/incoming/content_negotiation/008.php new file mode 100644 index 000000000000..2b4a67f1a6d0 --- /dev/null +++ b/user_guide_src/source/incoming/content_negotiation/008.php @@ -0,0 +1,10 @@ +negotiate('language', $supported); +// or +$lang = $negotiate->language($supported); diff --git a/user_guide_src/source/installation/upgrade_458.rst b/user_guide_src/source/installation/upgrade_458.rst index e8e8e681abe4..3a0a77f13fb4 100644 --- a/user_guide_src/source/installation/upgrade_458.rst +++ b/user_guide_src/source/installation/upgrade_458.rst @@ -52,4 +52,4 @@ All Changes This is a list of all files in the **project space** that received changes; many will be simple comments or formatting that have no effect on the runtime: -- @TODO +- @TODO \ No newline at end of file diff --git a/user_guide_src/source/installation/upgrade_460.rst b/user_guide_src/source/installation/upgrade_460.rst index a06729a30f29..57554c63b7bc 100644 --- a/user_guide_src/source/installation/upgrade_460.rst +++ b/user_guide_src/source/installation/upgrade_460.rst @@ -211,6 +211,7 @@ Config - app/Config/Feature.php - ``Config\Feature::$autoRoutesImproved`` has been changed to ``true``. + - ``Config\Feature::$strictLocaleNegotiation`` has been added. - app/Config/Routing.php - ``Config\Routing::$translateUriToCamelCase`` has been changed to ``true``. @@ -220,4 +221,4 @@ All Changes This is a list of all files in the **project space** that received changes; many will be simple comments or formatting that have no effect on the runtime: -- @TODO +- app/Config/Feature.php \ No newline at end of file