Skip to content

Commit

Permalink
[grc] basic deinflection (#1818)
Browse files Browse the repository at this point in the history
* [grc] basic deinflections

* fix copyright years

* delete comment

* more greek features

* skipped file

* lint

* improve latin conversion

* lint
  • Loading branch information
StefanVukovic99 authored Feb 24, 2025
1 parent 0357e8f commit 101eca0
Show file tree
Hide file tree
Showing 6 changed files with 324 additions and 1 deletion.
106 changes: 106 additions & 0 deletions ext/js/language/grc/ancient-greek-processors.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Copyright (C) 2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import {basicTextProcessorOptions, removeAlphabeticDiacritics} from '../text-processors.js';

/** @type {import('language').TextProcessor<boolean>} */
export const convertLatinToGreek = {
name: 'Convert latin characters to greek',
description: 'a → α, A → Α, b → β, B → Β, etc.',
options: basicTextProcessorOptions,
process: (str, setting) => {
return setting ? latinToGreek(str) : str;
},
};

/**
* @param {string} latin
* @returns {string}
*/
export function latinToGreek(latin) {
latin = removeAlphabeticDiacritics.process(latin, true);

const singleMap = {
a: 'α',
b: 'β',
g: 'γ',
d: 'δ',
e: 'ε',
z: 'ζ',
ē: 'η',
i: 'ι',
k: 'κ',
l: 'λ',
m: 'μ',
n: 'ν',
x: 'ξ',
o: 'ο',
p: 'π',
r: 'ρ',
s: 'σ',
t: 'τ',
u: 'υ',
ō: 'ω',
A: 'Α',
B: 'Β',
G: 'Γ',
D: 'Δ',
E: 'Ε',
Z: 'Ζ',
Ē: 'Η',
I: 'Ι',
K: 'Κ',
L: 'Λ',
M: 'Μ',
N: 'Ν',
X: 'Ξ',
O: 'Ο',
P: 'Π',
R: 'Ρ',
S: 'Σ',
T: 'Τ',
U: 'Υ',
Ō: 'Ω',
};

const doubleMap = {
th: 'θ',
ph: 'φ',
ch: 'χ',
ps: 'ψ',
Th: 'Θ',
Ph: 'Φ',
Ch: 'Χ',
Ps: 'Ψ',
};

let result = latin;

for (const [double, greek] of Object.entries(doubleMap)) {
result = result.replace(new RegExp(double, 'g'), greek);
}

// Handle basic character replacements
for (const [single, greek] of Object.entries(singleMap)) {
result = result.replace(new RegExp(single, 'g'), greek);
}

// Handle final sigma
result = result.replace(/σ$/, 'ς');

return result;
}
120 changes: 120 additions & 0 deletions ext/js/language/grc/ancient-greek-transforms.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Copyright (C) 2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import {suffixInflection} from '../language-transforms.js';

const conditions = {
v: {
name: 'Verb',
isDictionaryForm: true,
},
n: {
name: 'Noun',
isDictionaryForm: true,
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
},
};

/** @type {import('language-transformer').LanguageTransformDescriptor<keyof typeof conditions>} */
export const ancientGreekTransforms = {
language: 'grc',
conditions,
transforms: {
// inflections
// verbs
'3rd person singular present active indicative': {
name: '3rd person singular present active indicative',
rules: [
suffixInflection('ει', 'ω', [], ['v']),
suffixInflection('ει', 'εω', [], ['v']),
],
},

// nouns
'accusative singular': {
name: 'accusative singular',
rules: [
suffixInflection('ον', 'ος', [], ['n']),
],
},
'genitive singular': {
name: 'genitive singular',
rules: [
suffixInflection('ου', 'ος', [], ['n']),
],
},
'dative singular': {
name: 'dative singular',
rules: [
suffixInflection('ω', 'ος', [], ['n']),
],
},
'vocative singular': {
name: 'vocative singular',
rules: [
suffixInflection('ε', 'ος', [], ['n']),
],
},
'nominative plural': {
name: 'nominative plural',
rules: [
suffixInflection('οι', 'ος', [], ['n']),
],
},
'genitive plural': {
name: 'genitive plural',
rules: [
suffixInflection('ων', 'ος', [], ['n']),
],
},
'dative plural': {
name: 'dative plural',
rules: [
suffixInflection('οις', 'ος', [], ['n']),
],
},
'accusative plural': {
name: 'accusative plural',
rules: [
suffixInflection('ους', 'ος', [], ['n']),
],
},
'vocative plural': {
name: 'vocative plural',
rules: [
suffixInflection('οι', 'ος', [], ['n']),
],
},
// adjectives
'accusative singular masculine': {
name: 'accusative singular masculine',
rules: [
suffixInflection('ον', 'ος', [], ['adj']),
],
},
// word formation
'nominalization': {
name: 'nominalization',
rules: [
suffixInflection('ος', 'εω', [], ['v']),
],
},
},
};
4 changes: 4 additions & 0 deletions ext/js/language/language-descriptors.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import {esperantoTransforms} from './eo/esperanto-transforms.js';
import {spanishTransforms} from './es/spanish-transforms.js';
import {apostropheVariants} from './fr/french-text-preprocessors.js';
import {frenchTransforms} from './fr/french-transforms.js';
import {convertLatinToGreek} from './grc/ancient-greek-processors.js';
import {ancientGreekTransforms} from './grc/ancient-greek-transforms.js';
import {
alphabeticToHiragana,
alphanumericWidthVariants,
Expand Down Expand Up @@ -177,7 +179,9 @@ const languageDescriptors = [
textPreprocessors: {
...capitalizationPreprocessors,
removeAlphabeticDiacritics,
convertLatinToGreek,
},
languageTransforms: ancientGreekTransforms,
},
{
iso: 'hi',
Expand Down
30 changes: 30 additions & 0 deletions test/language/ancient-greek-processors.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright (C) 2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import {describe, expect, test} from 'vitest';
import {latinToGreek} from '../../ext/js/language/grc/ancient-greek-processors.js';


const testCases = [
['Zeus', 'Ζευς'],
];

describe('diacritics normalization', () => {
test.each(testCases)('%s converts to %s', (input, expected) => {
expect(latinToGreek(input)).toStrictEqual(expected);
});
});
61 changes: 61 additions & 0 deletions test/language/ancient-greek-transforms.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (C) 2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import {ancientGreekTransforms} from '../../ext/js/language/grc/ancient-greek-transforms.js';
import {LanguageTransformer} from '../../ext/js/language/language-transformer.js';
import {testLanguageTransformer} from '../fixtures/language-transformer-test.js';

/* eslint-disable @stylistic/no-multi-spaces */
const tests = [
{
category: 'verbs',
valid: true,
tests: [
{term: 'λύω', source: 'λύει', rule: 'v', reasons: ['3rd person singular present active indicative']},
{term: 'φιλεω', source: 'φιλει', rule: 'v', reasons: ['3rd person singular present active indicative']},
{term: 'γεωργεω', source: 'γεωργος', rule: 'v', reasons: ['nominalization']},
],
},
{
category: 'nouns',
valid: true,
tests: [
{term: 'ανθρωπος', source: 'ανθρωπον', rule: 'n', reasons: ['accusative singular']},
{term: 'ανθρωπος', source: 'ανθρωπου', rule: 'n', reasons: ['genitive singular']},
{term: 'ανθρωπος', source: 'ανθρωπε', rule: 'n', reasons: ['vocative singular']},
{term: 'ανθρωπος', source: 'ανθρωπω', rule: 'n', reasons: ['dative singular']},
{term: 'ανθρωπος', source: 'ανθρωποι', rule: 'n', reasons: ['nominative plural']},
{term: 'ανθρωπος', source: 'ανθρωποις', rule: 'n', reasons: ['dative plural']},
{term: 'ανθρωπος', source: 'ανθρωπους', rule: 'n', reasons: ['accusative plural']},
{term: 'ανθρωπος', source: 'ανθρωπων', rule: 'n', reasons: ['genitive plural']},
{term: 'ανθρωπος', source: 'ανθρωποι', rule: 'n', reasons: ['vocative plural']},
],
},
{
category: 'adjectives',
valid: true,
tests: [
{term: 'καλος', source: 'καλον', rule: 'adj', reasons: ['accusative singular masculine']},
],
},
];
/* eslint-enable @stylistic/no-multi-spaces */

const languageTransformer = new LanguageTransformer();
languageTransformer.addDescriptor(ancientGreekTransforms);

testLanguageTransformer(languageTransformer, tests);
4 changes: 3 additions & 1 deletion types/ext/language-descriptors.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,9 @@ type AllTextProcessors = {
};
};
grc: {
pre: CapitalizationPreprocessors & AlphabeticDiacriticsProcessor;
pre: CapitalizationPreprocessors & AlphabeticDiacriticsProcessor & {
convertLatinToGreek: TextProcessor<boolean>;
};
};
hi: Record<string, never>;
hu: {
Expand Down

0 comments on commit 101eca0

Please sign in to comment.