Skip to content

Commit

Permalink
fix missing meanings caused by multiple etymologies (#170)
Browse files Browse the repository at this point in the history
  • Loading branch information
StefanVukovic99 authored Nov 19, 2024
1 parent 351fa0d commit 2f1fdf6
Show file tree
Hide file tree
Showing 22 changed files with 3,518 additions and 3,181 deletions.
18 changes: 10 additions & 8 deletions 3-tidy-up.js
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ lr.on('line', (line) => {
* @param {KaikkiLine} parsedLine
*/
function handleLine(parsedLine) {
const { pos, sounds, forms } = parsedLine;
const { pos, sounds, forms, etymology_number = 0 } = parsedLine;
if(!pos) return;
const word = getCanonicalWordForm(parsedLine);
if (!word) return;
Expand Down Expand Up @@ -177,16 +177,16 @@ function handleLine(parsedLine) {
if (sensesWithoutInflectionGlosses.length === 0) return;

const readings = getReadings(word, parsedLine);
initializeWordResult(word, readings, pos);
initializeWordResult(word, readings, pos, String(etymology_number));

for (const ipaObj of ipa) {
saveIpaResult(word, readings, pos, ipaObj);
saveIpaResult(word, readings, pos, String(etymology_number), ipaObj);
}

const glossTree = getGlossTree(sensesWithoutInflectionGlosses);

for (const reading of readings) {
lemmaDict[word][reading][pos].glossTree = glossTree;
lemmaDict[word][reading][pos][String(etymology_number)].glossTree = glossTree;
}
}

Expand Down Expand Up @@ -273,11 +273,12 @@ function processForms(forms, word, pos) {
* @param {string} word
* @param {string[]} readings
* @param {string} pos
* @param {string} etymology_number
* @param {IpaInfo} ipaObj
*/
function saveIpaResult(word, readings, pos, ipaObj) {
function saveIpaResult(word, readings, pos, etymology_number, ipaObj) {
for (const reading of readings) {
const result = lemmaDict[word][reading][pos];
const result = lemmaDict[word][reading][pos][etymology_number];
const existingIpa = result.ipa.find(obj => obj.ipa === ipaObj.ipa);
if (!existingIpa) {
result.ipa.push(ipaObj);
Expand All @@ -291,10 +292,11 @@ function saveIpaResult(word, readings, pos, ipaObj) {
* @param {string} word
* @param {string[]} readings
* @param {string} pos
* @param {string} etymology_number
*/
function initializeWordResult(word, readings, pos) {
function initializeWordResult(word, readings, pos, etymology_number) {
for (const reading of readings) {
const result = ensureNestedObject(lemmaDict, [word, reading, pos]);
const result = ensureNestedObject(lemmaDict, [word, reading, pos, etymology_number]);
result.ipa ??= [];
result.glossTree ??= new Map();
}
Expand Down
86 changes: 44 additions & 42 deletions 4-make-yomitan.js
Original file line number Diff line number Diff line change
Expand Up @@ -271,50 +271,52 @@ let lastTermBankIndex = 0;

const ipa = [];

for (const [pos, info] of Object.entries(partsOfSpeechOfWord)) {
const foundPos = findPartOfSpeech(pos, partsOfSpeech, skippedPartsOfSpeech);
const {glossTree} = info;

const lemmaTags = [pos];
ipa.push(...info.ipa);

/** @type {Object<string, import('types').TermBank.TermInformation>} */
const entries = {};

for (const [gloss, branches] of glossTree.entries()) {
const tags = branches.get('_tags') || [];
branches.delete('_tags');

const senseTags = [...tags, ...lemmaTags];

/** @type {GlossBranch} */
const syntheticBranch = new Map();
syntheticBranch.set(gloss, branches);
const {glosses, recognizedTags} = handleNest(syntheticBranch, senseTags, pos);
const joinedTags = recognizedTags.join(' ');

if(!glosses || !glosses.length) continue;

if (entries[joinedTags]) {
// entries[joinedTags][5].push(gloss);
entries[joinedTags][5].push(...glosses);
} else {
entries[joinedTags] = [
term, // term
reading !== normalizedLemma ? reading : '', // reading
joinedTags, // definition_tags
foundPos, // rules
0, // frequency
glosses, // definitions
0, // sequence
'', // term_tags
];
for (const [pos, etyms] of Object.entries(partsOfSpeechOfWord)) {
for (const [etym_number, info] of Object.entries(etyms)) {
const foundPos = findPartOfSpeech(pos, partsOfSpeech, skippedPartsOfSpeech);
const {glossTree} = info;

const lemmaTags = [pos];
ipa.push(...info.ipa);

/** @type {Object<string, import('types').TermBank.TermInformation>} */
const entries = {};

for (const [gloss, branches] of glossTree.entries()) {
const tags = branches.get('_tags') || [];
branches.delete('_tags');

const senseTags = [...tags, ...lemmaTags];

/** @type {GlossBranch} */
const syntheticBranch = new Map();
syntheticBranch.set(gloss, branches);
const {glosses, recognizedTags} = handleNest(syntheticBranch, senseTags, pos);
const joinedTags = recognizedTags.join(' ');

if(!glosses || !glosses.length) continue;

if (entries[joinedTags]) {
// entries[joinedTags][5].push(gloss);
entries[joinedTags][5].push(...glosses);
} else {
entries[joinedTags] = [
term, // term
reading !== normalizedLemma ? reading : '', // reading
joinedTags, // definition_tags
foundPos, // rules
0, // frequency
glosses, // definitions
0, // sequence
'', // term_tags
];
}
}
}

debug(entries);
for (const [tags, entry] of Object.entries(entries)) {
ymtLemmas.push(entry);
debug(entries);
for (const [tags, entry] of Object.entries(entries)) {
ymtLemmas.push(entry);
}
}
}

Expand Down
14 changes: 14 additions & 0 deletions data/test/dict/de/en/tag_bank_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -103,5 +103,19 @@
-1,
"preposition",
1
],
[
"fem",
"",
-1,
"feminine",
1
],
[
"chem",
"",
0,
"chemistry",
0
]
]
66 changes: 66 additions & 0 deletions data/test/dict/de/en/term_bank_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -1122,5 +1122,71 @@
],
0,
""
],
[
"Base",
"",
"arch fem n",
"n",
0,
[
{
"type": "structured-content",
"content": [
{
"tag": "div",
"content": [
"A female cousin."
]
}
]
}
],
0,
""
],
[
"Base",
"",
"fem obs n",
"n",
0,
[
{
"type": "structured-content",
"content": [
{
"tag": "div",
"content": [
"paternal aunt"
]
}
]
}
],
0,
""
],
[
"Base",
"",
"fem n chem",
"n",
0,
[
{
"type": "structured-content",
"content": [
{
"tag": "div",
"content": [
"base (compound that will neutralize an acid)"
]
}
]
}
],
0,
""
]
]
49 changes: 49 additions & 0 deletions data/test/dict/de/en/term_bank_2.json
Original file line number Diff line number Diff line change
Expand Up @@ -2317,5 +2317,54 @@
],
0,
""
],
[
"Basen",
"",
"non-lemma",
"",
0,
[
[
"Base",
[
"plural"
]
],
[
"Base",
[
"accusative",
"plural",
"definite"
]
],
[
"Base",
[
"dative",
"plural",
"definite"
]
],
[
"Base",
[
"genitive",
"plural",
"definite"
]
],
[
"Base",
[
"nominative",
"plural",
"definite"
]
]
],
0,
""
]
]
13 changes: 13 additions & 0 deletions data/test/ipa/de/en/term_meta_bank_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,18 @@
}
]
}
],
[
"Base",
"ipa",
{
"reading": "Base",
"transcriptions": [
{
"ipa": "/ˈbaːzə/",
"tags": []
}
]
}
]
]
Loading

0 comments on commit 2f1fdf6

Please sign in to comment.