diff --git a/ext/js/language/grc/ancient-greek-processors.js b/ext/js/language/grc/ancient-greek-processors.js new file mode 100644 index 0000000000..34e688cbfe --- /dev/null +++ b/ext/js/language/grc/ancient-greek-processors.js @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2025 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {basicTextProcessorOptions, removeAlphabeticDiacritics} from '../text-processors.js'; + +/** @type {import('language').TextProcessor} */ +export const convertLatinToGreek = { + name: 'Convert latin characters to greek', + description: 'a → α, A → Α, b → β, B → Β, etc.', + options: basicTextProcessorOptions, + process: (str, setting) => { + return setting ? latinToGreek(str) : str; + }, +}; + +/** + * @param {string} latin + * @returns {string} + */ +export function latinToGreek(latin) { + latin = removeAlphabeticDiacritics.process(latin, true); + + const singleMap = { + a: 'α', + b: 'β', + g: 'γ', + d: 'δ', + e: 'ε', + z: 'ζ', + ē: 'η', + i: 'ι', + k: 'κ', + l: 'λ', + m: 'μ', + n: 'ν', + x: 'ξ', + o: 'ο', + p: 'π', + r: 'ρ', + s: 'σ', + t: 'τ', + u: 'υ', + ō: 'ω', + A: 'Α', + B: 'Β', + G: 'Γ', + D: 'Δ', + E: 'Ε', + Z: 'Ζ', + Ē: 'Η', + I: 'Ι', + K: 'Κ', + L: 'Λ', + M: 'Μ', + N: 'Ν', + X: 'Ξ', + O: 'Ο', + P: 'Π', + R: 'Ρ', + S: 'Σ', + T: 'Τ', + U: 'Υ', + Ō: 'Ω', + }; + + const doubleMap = { + th: 'θ', + ph: 'φ', + ch: 'χ', + ps: 'ψ', + Th: 'Θ', + Ph: 'Φ', + Ch: 'Χ', + Ps: 'Ψ', + }; + + let result = latin; + + for (const [double, greek] of Object.entries(doubleMap)) { + result = result.replace(new RegExp(double, 'g'), greek); + } + + // Handle basic character replacements + for (const [single, greek] of Object.entries(singleMap)) { + result = result.replace(new RegExp(single, 'g'), greek); + } + + // Handle final sigma + result = result.replace(/σ$/, 'ς'); + + return result; +} diff --git a/ext/js/language/grc/ancient-greek-transforms.js b/ext/js/language/grc/ancient-greek-transforms.js new file mode 100644 index 0000000000..c9ccb99ff0 --- /dev/null +++ b/ext/js/language/grc/ancient-greek-transforms.js @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2025 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {suffixInflection} from '../language-transforms.js'; + +const conditions = { + v: { + name: 'Verb', + isDictionaryForm: true, + }, + n: { + name: 'Noun', + isDictionaryForm: true, + }, + adj: { + name: 'Adjective', + isDictionaryForm: true, + }, +}; + +/** @type {import('language-transformer').LanguageTransformDescriptor} */ +export const ancientGreekTransforms = { + language: 'grc', + conditions, + transforms: { + // inflections + // verbs + '3rd person singular present active indicative': { + name: '3rd person singular present active indicative', + rules: [ + suffixInflection('ει', 'ω', [], ['v']), + suffixInflection('ει', 'εω', [], ['v']), + ], + }, + + // nouns + 'accusative singular': { + name: 'accusative singular', + rules: [ + suffixInflection('ον', 'ος', [], ['n']), + ], + }, + 'genitive singular': { + name: 'genitive singular', + rules: [ + suffixInflection('ου', 'ος', [], ['n']), + ], + }, + 'dative singular': { + name: 'dative singular', + rules: [ + suffixInflection('ω', 'ος', [], ['n']), + ], + }, + 'vocative singular': { + name: 'vocative singular', + rules: [ + suffixInflection('ε', 'ος', [], ['n']), + ], + }, + 'nominative plural': { + name: 'nominative plural', + rules: [ + suffixInflection('οι', 'ος', [], ['n']), + ], + }, + 'genitive plural': { + name: 'genitive plural', + rules: [ + suffixInflection('ων', 'ος', [], ['n']), + ], + }, + 'dative plural': { + name: 'dative plural', + rules: [ + suffixInflection('οις', 'ος', [], ['n']), + ], + }, + 'accusative plural': { + name: 'accusative plural', + rules: [ + suffixInflection('ους', 'ος', [], ['n']), + ], + }, + 'vocative plural': { + name: 'vocative plural', + rules: [ + suffixInflection('οι', 'ος', [], ['n']), + ], + }, + // adjectives + 'accusative singular masculine': { + name: 'accusative singular masculine', + rules: [ + suffixInflection('ον', 'ος', [], ['adj']), + ], + }, + // word formation + 'nominalization': { + name: 'nominalization', + rules: [ + suffixInflection('ος', 'εω', [], ['v']), + ], + }, + }, +}; diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 79d12f5ea8..164a2b8564 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -25,6 +25,8 @@ import {esperantoTransforms} from './eo/esperanto-transforms.js'; import {spanishTransforms} from './es/spanish-transforms.js'; import {apostropheVariants} from './fr/french-text-preprocessors.js'; import {frenchTransforms} from './fr/french-transforms.js'; +import {convertLatinToGreek} from './grc/ancient-greek-processors.js'; +import {ancientGreekTransforms} from './grc/ancient-greek-transforms.js'; import { alphabeticToHiragana, alphanumericWidthVariants, @@ -177,7 +179,9 @@ const languageDescriptors = [ textPreprocessors: { ...capitalizationPreprocessors, removeAlphabeticDiacritics, + convertLatinToGreek, }, + languageTransforms: ancientGreekTransforms, }, { iso: 'hi', diff --git a/test/language/ancient-greek-processors.test.js b/test/language/ancient-greek-processors.test.js new file mode 100644 index 0000000000..d76e942348 --- /dev/null +++ b/test/language/ancient-greek-processors.test.js @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2025 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {describe, expect, test} from 'vitest'; +import {latinToGreek} from '../../ext/js/language/grc/ancient-greek-processors.js'; + + +const testCases = [ + ['Zeus', 'Ζευς'], +]; + +describe('diacritics normalization', () => { + test.each(testCases)('%s converts to %s', (input, expected) => { + expect(latinToGreek(input)).toStrictEqual(expected); + }); +}); diff --git a/test/language/ancient-greek-transforms.test.js b/test/language/ancient-greek-transforms.test.js new file mode 100644 index 0000000000..ecc8add44a --- /dev/null +++ b/test/language/ancient-greek-transforms.test.js @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2025 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {ancientGreekTransforms} from '../../ext/js/language/grc/ancient-greek-transforms.js'; +import {LanguageTransformer} from '../../ext/js/language/language-transformer.js'; +import {testLanguageTransformer} from '../fixtures/language-transformer-test.js'; + +/* eslint-disable @stylistic/no-multi-spaces */ +const tests = [ + { + category: 'verbs', + valid: true, + tests: [ + {term: 'λύω', source: 'λύει', rule: 'v', reasons: ['3rd person singular present active indicative']}, + {term: 'φιλεω', source: 'φιλει', rule: 'v', reasons: ['3rd person singular present active indicative']}, + {term: 'γεωργεω', source: 'γεωργος', rule: 'v', reasons: ['nominalization']}, + ], + }, + { + category: 'nouns', + valid: true, + tests: [ + {term: 'ανθρωπος', source: 'ανθρωπον', rule: 'n', reasons: ['accusative singular']}, + {term: 'ανθρωπος', source: 'ανθρωπου', rule: 'n', reasons: ['genitive singular']}, + {term: 'ανθρωπος', source: 'ανθρωπε', rule: 'n', reasons: ['vocative singular']}, + {term: 'ανθρωπος', source: 'ανθρωπω', rule: 'n', reasons: ['dative singular']}, + {term: 'ανθρωπος', source: 'ανθρωποι', rule: 'n', reasons: ['nominative plural']}, + {term: 'ανθρωπος', source: 'ανθρωποις', rule: 'n', reasons: ['dative plural']}, + {term: 'ανθρωπος', source: 'ανθρωπους', rule: 'n', reasons: ['accusative plural']}, + {term: 'ανθρωπος', source: 'ανθρωπων', rule: 'n', reasons: ['genitive plural']}, + {term: 'ανθρωπος', source: 'ανθρωποι', rule: 'n', reasons: ['vocative plural']}, + ], + }, + { + category: 'adjectives', + valid: true, + tests: [ + {term: 'καλος', source: 'καλον', rule: 'adj', reasons: ['accusative singular masculine']}, + ], + }, +]; +/* eslint-enable @stylistic/no-multi-spaces */ + +const languageTransformer = new LanguageTransformer(); +languageTransformer.addDescriptor(ancientGreekTransforms); + +testLanguageTransformer(languageTransformer, tests); diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index 70cedb3e0d..b39bb82047 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -121,7 +121,9 @@ type AllTextProcessors = { }; }; grc: { - pre: CapitalizationPreprocessors & AlphabeticDiacriticsProcessor; + pre: CapitalizationPreprocessors & AlphabeticDiacriticsProcessor & { + convertLatinToGreek: TextProcessor; + }; }; hi: Record; hu: {