Skip to content

Commit

Permalink
Add extensive alpha3 languages from ISO639-3 (#71)
Browse files Browse the repository at this point in the history
  • Loading branch information
PrinsFrank authored Oct 28, 2023
1 parent 7bde634 commit f6a4bbd
Show file tree
Hide file tree
Showing 6 changed files with 8,028 additions and 1 deletion.
33 changes: 33 additions & 0 deletions .github/workflows/update-spec-language-extensive.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Daily Scripts spec update

on:
workflow_dispatch:
schedule:
- cron: '55 17 * * *'

jobs:
update-specs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: composer install
- name: Run standard update command
run: composer update-spec-language-extensive
- name: Run CSFixer
run: composer cs:fix
- name: Create Pull Request
id: cpr
uses: peter-evans/create-pull-request@v4
with:
commit-message: Automatic Language Extensive spec update from upstream changes
delete-branch: true
title: 'Automatic Language Extensive spec update'
branch: update-spec/language-extensive
body: |
This PR makes sure the content of this package is updated with upstream changes in the specs.
labels: |
spec-update
automated pr
reviewers: prinsfrank
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"update-spec-currency": "@update-spec PrinsFrank\\Standards\\Dev\\DataSource\\Mapping\\CurrencyMapping",
"update-spec-http-methods": "@update-spec PrinsFrank\\Standards\\Dev\\DataSource\\Mapping\\HttpMethodMapping",
"update-spec-http-status-codes": "@update-spec PrinsFrank\\Standards\\Dev\\DataSource\\Mapping\\HttpStatusCodeMapping",
"update-spec-language-extensive": "@update-spec PrinsFrank\\Standards\\Dev\\DataSource\\Mapping\\LanguageExtensiveMapping",
"update-spec-language": "@update-spec PrinsFrank\\Standards\\Dev\\DataSource\\Mapping\\LanguageMapping",
"update-spec-scripts": "@update-spec PrinsFrank\\Standards\\Dev\\DataSource\\Mapping\\ScriptMapping",
"detect-drivers": "vendor/bin/bdi detect drivers"
Expand Down
2 changes: 2 additions & 0 deletions dev/DataSource/DataSourceMappingProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

namespace PrinsFrank\Standards\Dev\DataSource;

use PrinsFrank\Standards\Dev\DataSource\Mapping\LanguageExtensiveMapping;
use PrinsFrank\Standards\Dev\DataSource\Mapping\CountryMapping;
use PrinsFrank\Standards\Dev\DataSource\Mapping\CurrencyMapping;
use PrinsFrank\Standards\Dev\DataSource\Mapping\HttpMethodMapping;
Expand All @@ -24,6 +25,7 @@ public function provide(): array
CurrencyMapping::class,
HttpMethodMapping::class,
HttpStatusCodeMapping::class,
LanguageExtensiveMapping::class,
LanguageMapping::class,
ScriptMapping::class,
];
Expand Down
70 changes: 70 additions & 0 deletions dev/DataSource/Mapping/LanguageExtensiveMapping.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<?php
declare(strict_types=1);

namespace PrinsFrank\Standards\Dev\DataSource\Mapping;

use PrinsFrank\Standards\Dev\DataSource\Sorting\SortingInterface;
use PrinsFrank\Standards\Dev\DataSource\Sorting\ValueWithDeprecatedTagsSeparateSorting;
use PrinsFrank\Standards\Dev\DataTarget\EnumCase;
use PrinsFrank\Standards\Dev\DataTarget\EnumFile;
use PrinsFrank\Standards\Language\LanguageAlpha3Extensive;
use Symfony\Component\Panther\Client;
use Symfony\Component\Panther\DomCrawler\Crawler;

/**
* @template TDataSet of object{Id: string, Part2B: string, Part2T: string, Part1: string, Scope: string, LanguageType: string, RefName: string, Comment: string}
* @implements Mapping<TDataSet>
*/
class LanguageExtensiveMapping implements Mapping
{
public static function url(): string
{
return 'https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3_Latin1.tab';
}

public static function toDataSet(Client $client, Crawler $crawler): array
{
$dataSet = [];
foreach (explode("\n", $client->getPageSource()) as $lineNumber => $line) {
if ($lineNumber === 0) {
continue;
}

$columns = explode("\t", $line);

$record = (object) [];
$record->Id = $columns[0];
$record->Part2B = $columns[1];
$record->Part2T = $columns[2];
$record->Part1 = $columns[3];
$record->Scope = $columns[4];
$record->LanguageType = $columns[5];
$record->RefName = $columns[6];
$record->Comment = $columns[7];

/** @var TDataSet $record */
$dataSet[] = $record;
}

return $dataSet;
}

public static function toEnumMapping(array $dataSet): array
{
$languageExtensive = new EnumFile(LanguageAlpha3Extensive::class);
foreach ($dataSet as $dataItem) {
if (in_array($dataItem->RefName, ['Fa D\'ambu', 'C\'lela'], true)) {
continue;
}

$languageExtensive->addCase(new EnumCase($dataItem->RefName, $dataItem->Id));
}

return [$languageExtensive];
}

public static function getSorting(): SortingInterface
{
return new ValueWithDeprecatedTagsSeparateSorting();
}
}
2 changes: 1 addition & 1 deletion dev/DataTarget/NameNormalizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static function normalize(string $key): string
throw new TransliterationException();
}

$key = str_replace([' ', ';', ',', '(', ')', '-', '.', '\'', '*', '[', ']'], '_', $key);
$key = str_replace([' ', ';', ',', '(', ')', '-', '.', '\'', '/', '|', '=', '!', '?', '*', '[', ']', '~'], '_', $key);

return trim(str_replace(['__', '__'], ['_', '_'], $key), '_');

This comment has been minimized.

Copy link
@szepeviktor

szepeviktor Oct 28, 2023

Contributor

@PrinsFrank Does str_replace here replace double underscore with single twice?

}
Expand Down
Loading

0 comments on commit f6a4bbd

Please sign in to comment.