Skip to content

Commit

Permalink
Fix uk-to-en transliteration
Browse files Browse the repository at this point in the history
  • Loading branch information
fre5h committed Apr 12, 2022
1 parent 734d89a commit 7d88fa6
Show file tree
Hide file tree
Showing 7 changed files with 226 additions and 173 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ PHP library for transliteration. 🇺🇦 🇬🇧 🔡 🐘
<td>
Resolution of the Cabinet of Ministers of Ukraine №55 dated January 27, 2010
<br />
http://zakon1.rada.gov.ua/laws/show/55-2010-%D0%BF
https://zakon.rada.gov.ua/laws/show/55-2010-%D0%BF#Text
</td>
</tr>
</tbody>
Expand All @@ -54,7 +54,7 @@ use Fresh\Transliteration\UkrainianToEnglish;

class Foo
{
public function bar($text)
public function bar(string $text): void
{
// You can use in this way
$transliterator = new Transliterator();
Expand Down
8 changes: 4 additions & 4 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
"php": ">=8.1"
},
"require-dev": {
"friendsofphp/php-cs-fixer": "^3.3",
"phpstan/phpstan": "^1.2.0",
"phpstan/phpstan-phpunit": "^1.0.0",
"friendsofphp/php-cs-fixer": "^3.8",
"phpstan/phpstan": "^1.5",
"phpstan/phpstan-phpunit": "^1.1",
"phpunit/phpunit": "^9.5",
"slam/phpstan-extensions": "^6.0",
"squizlabs/php_codesniffer": "^3.6",
"thecodingmachine/phpstan-strict-rules": "^1.0.0"
"thecodingmachine/phpstan-strict-rules": "^1.0"
},
"autoload": {
"psr-4": {
Expand Down
2 changes: 1 addition & 1 deletion phpunit.xml.dist
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>

<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="http://schema.phpunit.de/9.5/phpunit.xsd"
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/9.5/phpunit.xsd"
colors="true"
bootstrap="vendor/autoload.php"
>
Expand Down
8 changes: 4 additions & 4 deletions src/RussianToEnglish.php
Original file line number Diff line number Diff line change
Expand Up @@ -94,19 +94,19 @@ class RussianToEnglish implements TransliteratorInterface
];

/**
* @param string $russianText
* @param string $textToTransliterate
*
* @return string
*/
public static function transliterate(string $russianText): string
public static function transliterate(string $textToTransliterate): string
{
$transliteratedText = '';

if ('' !== $russianText) {
if ('' !== $textToTransliterate) {
$transliteratedText = \str_replace(
\array_keys(self::RUSSIAN_TO_ENGLISH_RULES),
\array_values(self::RUSSIAN_TO_ENGLISH_RULES),
$russianText
$textToTransliterate
);
}

Expand Down
61 changes: 48 additions & 13 deletions src/UkrainianToEnglish.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ class UkrainianToEnglish implements TransliteratorInterface
'ґ' => 'g',
'д' => 'd',
'е' => 'e',
'є' => 'ie',
'є' => 'ye',
'ж' => 'zh',
'з' => 'z',
'и' => 'y',
'і' => 'i',
'ї' => 'i',
'й' => 'i',
'ї' => 'yi',
'й' => 'y',
'к' => 'k',
'л' => 'l',
'м' => 'm',
Expand All @@ -90,41 +90,76 @@ class UkrainianToEnglish implements TransliteratorInterface
'ш' => 'sh',
'щ' => 'shch',
'ь' => '',
'ю' => 'yu',
'я' => 'ya',
'\'' => '',
];

// only inside words
private const VOWEL_EXCEPTIONS = [
'є' => 'ie',
'ї' => 'i',
'й' => 'i',
'ю' => 'iu',
'я' => 'ia',
'\'' => '',
];

/**
* @param string $ukrainianText
* @param string $textToTransliterate
*
* @return string
*/
public static function transliterate(string $ukrainianText): string
public static function transliterate(string $textToTransliterate): string
{
$transliteratedText = '';

if ('' !== $ukrainianText) {
if (self::checkForZghException($ukrainianText)) {
$ukrainianText = \str_replace(['Зг', 'зг'], ['Zgh', 'zgh'], $ukrainianText);
if ('' !== $textToTransliterate) {
if (self::checkForZghException($textToTransliterate)) {
$textToTransliterate = \str_replace(['Зг', 'зг'], ['Zgh', 'zgh'], $textToTransliterate);
}

if (1 === \preg_match('/[єюїйя]/u', $textToTransliterate)) {
$textToTransliterate = self::processExceptionsForVowelsInsideWords($textToTransliterate);
}

$transliteratedText = \str_replace(
\array_keys(self::UKRAINIAN_TO_ENGLISH_RULES),
\array_values(self::UKRAINIAN_TO_ENGLISH_RULES),
$ukrainianText
$textToTransliterate
);
}

return $transliteratedText;
}

/**
* @param string $ukrainianText
* @param string $textToTransliterate
*
* @return string
*/
private static function processExceptionsForVowelsInsideWords(string $textToTransliterate): string
{
$characters = mb_str_split($textToTransliterate);
$vowelsWithExceptions = \array_keys(self::VOWEL_EXCEPTIONS);
$ukrainianLetters = \array_keys(self::UKRAINIAN_TO_ENGLISH_RULES);

foreach ($characters as $i => $character) {
// Check that vowel inside word
if (0 !== $i && \in_array($character, $vowelsWithExceptions, true) && \in_array($characters[$i - 1], $ukrainianLetters, true)) {
$characters[$i] = self::VOWEL_EXCEPTIONS[$character];
}
}

return \implode('', $characters);
}

/**
* @param string $textToTransliterate
*
* @return bool
*/
private static function checkForZghException(string $ukrainianText): bool
private static function checkForZghException(string $textToTransliterate): bool
{
return (bool) \mb_substr_count($ukrainianText, 'Зг') || (bool) \mb_substr_count($ukrainianText, 'зг');
return (bool) \mb_substr_count($textToTransliterate, 'Зг') || (bool) \mb_substr_count($textToTransliterate, 'зг');
}
}
7 changes: 6 additions & 1 deletion tests/RussianToEnglishTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ protected function setUp(): void
$this->transliterator = new Transliterator();
}

protected function tearDown(): void
{
unset($this->transliterator);
}

/**
* @dataProvider alphabetProvider
*/
Expand All @@ -39,7 +44,7 @@ public function testTransliterationFromRussianToEnglish(string $russianText, str
self::assertEquals($transliteratedText, $this->transliterator->ruToEn($russianText));
}

public function alphabetProvider(): iterable
public static function alphabetProvider(): iterable
{
yield ['А', 'A'];
yield ['Б', 'B'];
Expand Down
Loading

0 comments on commit 7d88fa6

Please sign in to comment.