From d0a71039d7683edacb5ee5ca2cc4bc785e014f2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Vukovi=C4=87?= <stefanvukovic44@gmail.com>
Date: Tue, 15 Oct 2024 12:21:19 +0200
Subject: [PATCH] start adding types (#152)

* baseline

* early exit on senses

* refactor

* wip

* wip

* wip

* start adding types
---
 .gitignore                                |   2 +
 .vscode/settings.json                     |   3 +
 3-tidy-up.js                              | 245 ++++++++++++++++------
 4-make-yomitan.js                         |   1 +
 data/test/dict/sq/en/tag_bank_1.json      |   7 +
 data/test/dict/sq/en/term_bank_1.json     | 169 +++++++++++++++
 data/test/dict/sq/en/term_bank_2.json     | 197 +++++++++++++++++
 data/test/ipa/sq/en/tag_bank_1.json       |  10 +-
 data/test/ipa/sq/en/term_meta_bank_1.json |  46 ++++
 data/test/kaikki/sq-en.json               |   3 +-
 data/test/tidy/sq-en-forms-0.json         | 126 +++++++++++
 data/test/tidy/sq-en-lemmas.json          | 219 +++++++++++++++++++
 jsconfig.json                             |  21 ++
 types/types.ts                            | 106 ++++++++++
 util/util.js                              |  13 +-
 15 files changed, 1101 insertions(+), 67 deletions(-)
 create mode 100644 .vscode/settings.json
 create mode 100644 jsconfig.json
 create mode 100644 types/types.ts

diff --git a/.gitignore b/.gitignore
index aef1c85..e177713 100755
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 *.json
 *.jsonl
+!jsconfig.json
+!.vscode/settings.json
 !tag_bank_term.json
 !tag_bank_ipa.json
 !parts_of_speech.json
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..be944f5
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "javascript.validate.enable": true
+}
\ No newline at end of file
diff --git a/3-tidy-up.js b/3-tidy-up.js
index 23c52cf..06fed75 100644
--- a/3-tidy-up.js
+++ b/3-tidy-up.js
@@ -7,20 +7,34 @@ const {
     target_iso: targetIso,
     kaikki_file: kaikkiFile,
     tidy_folder: writeFolder
-} = process.env;
+} = /** @type {TidyEnv} */ (process.env);
 
 const { sortTags, similarSort, mergePersonTags, consoleOverwrite, clearConsoleLine, logProgress, mapJsonReplacer } = require('./util/util');
 
+/** @type {LemmaDict} */
 const lemmaDict = {};
+
+/** @type {FormsMap} */
 const formsMap = new Map();
+
+/** @type {AutomatedForms} */
 const automatedForms = new Map();
 
+/**
+ * @param {string} string
+ * @returns {string}
+*/
 function escapeRegExp(string) {
     return string.replace(/[.*+\-?^${}()|[\]\\]/g, '\\$&');
 }
 
+/**
+ * @param {string[]} glosses 
+ * @param {FormOf[]|undefined} formOf 
+ * @returns {boolean}
+ */
 function isInflectionGloss(glosses, formOf) {
-    glossesString = JSON.stringify(glosses);
+    const glossesString = JSON.stringify(glosses);
     switch (targetIso) {
         case 'de':
             if (glosses.some(gloss => /des (?:Verbs|Adjektivs|Substantivs|Demonstrativpronomens|Possessivpronomens|Pronomens)/.test(gloss))) return true;
@@ -28,6 +42,7 @@ function isInflectionGloss(glosses, formOf) {
             if (glosses.some(gloss => /.*inflection of.*/.test(gloss))) return true;
             if(!Array.isArray(formOf)) return false;
             for (const {word: lemma} of formOf) {
+                if(!lemma) continue;
                 if (glosses.some(gloss => new RegExp(`of ${escapeRegExp(lemma)}$`).test(gloss))) return true;
             }
             
@@ -38,12 +53,16 @@ function isInflectionGloss(glosses, formOf) {
     return false;
 }
 
-
-function handleLevel(nest, level) {
+/**
+ * @param {GlossTree} glossTree
+ * @param {number} level
+ * @returns {*}
+ */
+function handleLevel(glossTree, level) {
     const nestDefs = [];
     let defIndex = 0;
 
-    for (const [def, children] of nest) {
+    for (const [def, children] of glossTree) {
         defIndex += 1;
 
         if(children.size > 0) {
@@ -65,6 +84,10 @@ function handleLevel(nest, level) {
     return nestDefs;
 }
 
+/**
+ * @param {GlossTree} glossTree
+ * @param {SenseInfo} sense
+ */
 function handleNest(glossTree, sense) {
     const nestedGloss = handleLevel(glossTree, 1);
 
@@ -74,15 +97,20 @@ function handleNest(glossTree, sense) {
         }
     }
 }
-
+/**
+ * @param {string} form 
+ * @param {string} pos 
+ * @param {string} lemma 
+ * @param {string[]|Set<string>} inflections 
+ */
 function addDeinflections(form, pos, lemma, inflections) {
     if (targetIso === 'fr') {
         form = form.replace(/(qu\')?(ils\/elles|il\/elle\/on)\s*/, '');
     }
 
-    const lemmaForms = formsMap.get(lemma) || new Map();
+    const lemmaForms = formsMap.get(lemma) || /** @type {Map<Form, Map<PoS, string[]>>} */ (new Map());
     formsMap.set(lemma, lemmaForms);
-    const formPOSs = lemmaForms.get(form) || new Map();
+    const formPOSs = lemmaForms.get(form) || /** @type {Map<PoS, string[]>} */ (new Map());
     lemmaForms.set(form, formPOSs);
     formPOSs.get(pos) || formPOSs.set(pos, []);
 
@@ -132,46 +160,26 @@ lr.on('line', (line) => {
     if (line) {
         lineCount += 1;
         logProgress("Processing lines", lineCount);
-        handleLine(line);
+        handleLine(JSON.parse(line));
     }
 });
 
-function handleLine(line) {
-    const parsedLine = JSON.parse(line);
+/**
+ * @param {KaikkiLine} parsedLine 
+ */
+function handleLine(parsedLine) {
     const { pos, sounds, forms } = parsedLine;
     if(!pos) return;
     const word = getCanonicalWordForm(parsedLine);
     if (!word) return;
-    const readings = getReadings(word, parsedLine);
     
-    if (forms) {
-        forms.forEach((formData) => {
-            const { form } = formData;
-            let { tags } = formData;
-            if(!form) return;
-            if(!tags) return;
-            if(form === '-') return;
-            tags = tags.filter(tag => !redundantTags.includes(tag));
-            const isBlacklisted = tags.some(value => blacklistedTags.includes(value));
-            if (isBlacklisted) return;
-            const isIdentity = !tags.some(value => !identityTags.includes(value));
-            if (isIdentity) return;
-
-            const wordMap = automatedForms.get(word) || new Map();
-            const formMap = wordMap.get(form) || new Map();
-            formMap.get(pos) || formMap.set(pos, new Set());
-            wordMap.set(form, formMap);
-            automatedForms.set(word, wordMap);
-            
-            const tagsSet = new Set((formMap.get(pos)));
-            
-            tagsSet.add(sortTags(targetIso, tags).join(' '));
-            
-            formMap.set(pos, similarSort(mergePersonTags(targetIso, Array.from(tagsSet))));                     
-        });
-    }
+    processForms(forms, word, pos);
+
+    const {senses} = parsedLine;
+    if (!senses) return;
     
-    const ipa = sounds 
+    /** @type {IpaInfo[]} */
+    const ipa = /** @type {IpaInfo[]} */ (sounds 
         ? sounds
             .filter(sound => sound && sound.ipa)
             .map(({ipa, tags, note}) => {
@@ -184,16 +192,14 @@ function handleLine(line) {
                 }
                 return ({ipa, tags})
             })
-            .flatMap(ipaObj => typeof ipaObj.ipa === 'string' ? [ipaObj] : ipaObj.ipa.map(ipa => ({ ipa, tags: ipaObj.tags })) )
-            .filter(ipaObj => ipaObj.ipa)
-        : [];
-
+            .flatMap(ipaObj => typeof ipaObj.ipa === 'string' ? [ipaObj] : ipaObj?.ipa?.map(ipa => ({ ipa, tags: ipaObj.tags })) )
+            .filter(ipaObj => ipaObj?.ipa)
+        : []);
     
-    const {senses} = parsedLine;
-    if (!senses) return;
-
-    const sensesWithGlosses = senses.filter(sense => sense.glosses || sense.raw_glosses || sense.raw_gloss);
-    sensesWithGlosses.map(sense => {
+    /** @type {TidySense[]} */
+    const sensesWithGlosses = /** @type {TidySense[]} */ (senses
+        .filter(sense => sense.glosses || sense.raw_glosses || sense.raw_gloss)
+        .map(sense => {
         const glosses = sense.raw_glosses || sense.raw_gloss || sense.glosses;
         const glossesArray = Array.isArray(glosses) ? glosses : [glosses];
 
@@ -202,9 +208,8 @@ function handleLine(line) {
             tags.push(...sense.raw_tags);
         }
 
-        sense.glossesArray = glossesArray;
-        sense.tags = tags;
-    });
+        return {...sense, glossesArray, tags};
+    }));
 
     const sensesWithoutInflectionGlosses = sensesWithGlosses.filter(sense => {
         const {glossesArray, form_of, glosses} = sense;
@@ -214,38 +219,44 @@ function handleLine(line) {
     });
 
     if (sensesWithoutInflectionGlosses.length === 0) return;
-        
+    
+    const readings = getReadings(word, parsedLine);
     initializeWordResult(word, readings, pos);
 
     for (const ipaObj of ipa) {
         saveIpaResult(word, readings, pos, ipaObj);
     }
 
+    /** @type {GlossTree} */
     const glossTree = new Map();
     for (const sense of sensesWithoutInflectionGlosses) {
         const { glossesArray, tags } = sense;
         let temp = glossTree;
         for (const [levelIndex, levelGloss] of glossesArray.entries()) {
-            if(!temp.get(levelGloss)) {
-                temp.set(levelGloss, new Map());
+            let curr = temp.get(levelGloss);
+            if(!curr) {
+                curr = new Map();
+                temp.set(levelGloss, curr);
                 if(levelIndex === 0) {
-                    temp.get(levelGloss).set('_tags', tags);
+                    curr.set('_tags', tags);
                 }
             } else if (levelIndex === 0) {
-                temp.get(levelGloss).set('_tags', tags.filter(value => temp.get(levelGloss).get('_tags').includes(value)));
+                curr.set('_tags', tags.filter(value => curr?.get('_tags')?.includes(value)));
             }
-            temp = temp.get(levelGloss);
+            temp = curr;
         }
     }
     
     for (const [gloss, children] of glossTree) {
-        const tags = children.get('_tags');
-        children.delete('_tags');
+        const tags = children.get('_tags') || [];
+        children.delete('_tags');   
 
+        /** @type {SenseInfo} */
         const currSense = { glosses: [], tags };
         if(children.size === 0) {
             currSense.glosses.push(gloss);
         } else {
+            /** @type {GlossTree} */
             const branch = new Map();
             branch.set(gloss, children);
             handleNest(branch, currSense);
@@ -257,12 +268,59 @@ function handleLine(line) {
     }
 }
 
+/**
+ * @param {FormInfo[]|undefined} forms
+ * @param {string} word 
+ * @param {string} pos 
+ */
+function processForms(forms, word, pos) {
+    if(!forms) return;
+    forms.forEach((formData) => {
+        const { form } = formData;
+        let { tags } = formData;
+        if (!form) return;
+        if (!tags) return;
+        if (form === '-') return;
+        tags = tags.filter(tag => !redundantTags.includes(tag));
+        const isBlacklisted = tags.some(value => blacklistedTags.includes(value));
+        if (isBlacklisted) return;
+        const isIdentity = !tags.some(value => !identityTags.includes(value));
+        if (isIdentity) return;
+
+        /** @type {Map<Form, Map<PoS, string[]|Set<string>>>} */
+        const wordMap = automatedForms.get(word) || new Map();
+        /** @type {Map<string, Set<string>|string[]>} */
+        const formMap = wordMap.get(form) || new Map();
+        formMap.get(pos) || formMap.set(pos, new Set());
+        wordMap.set(form, formMap);
+        automatedForms.set(word, wordMap);
+
+        const tagsSet = new Set((formMap.get(pos)));
+
+        tagsSet.add(sortTags(targetIso, tags).join(' '));
+
+        formMap.set(pos, similarSort(mergePersonTags(targetIso, Array.from(tagsSet))));
+    });
+}
+
+/**
+ * @param {string} word 
+ * @param {string[]} readings 
+ * @param {string} pos 
+ * @param {SenseInfo} currSense 
+ */
 function saveSenseResult(word, readings, pos, currSense) {
     for (const reading of readings) {
         lemmaDict[word][reading][pos].senses.push(currSense);
     }
 }
 
+/**
+ * @param {string} word 
+ * @param {string[]} readings 
+ * @param {string} pos 
+ * @param {IpaInfo} ipaObj 
+ */
 function saveIpaResult(word, readings, pos, ipaObj) {
     for (const reading of readings) {
         const result = lemmaDict[word][reading][pos];
@@ -272,6 +330,11 @@ function saveIpaResult(word, readings, pos, ipaObj) {
     }
 }
 
+/**
+ * @param {string} word 
+ * @param {string[]} readings 
+ * @param {string} pos 
+ */
 function initializeWordResult(word, readings, pos) {
     for (const reading of readings) {
         const result = ensureNestedObject(lemmaDict, [word, reading, pos]);
@@ -280,6 +343,12 @@ function initializeWordResult(word, readings, pos) {
     }
 }
 
+/**
+ * @param {Glosses|undefined} glosses
+ * @param {string} word 
+ * @param {string} pos 
+ * @returns 
+ */
 function processInflectionGlosses(glosses, word, pos) {
     switch (targetIso) {
         case 'de':
@@ -287,6 +356,10 @@ function processInflectionGlosses(glosses, word, pos) {
         case 'en':
             return processEnglishInflectionGlosses(glosses, word, pos);
         case 'fr':
+            if(!glosses) return;
+            /**
+             * @type {string|undefined}
+             */
             let inflection, lemma;
 
             const match1 = glosses[0].match(/(.*)du verbe\s+((?:(?!\bdu\b).)*)$/);
@@ -312,7 +385,14 @@ function processInflectionGlosses(glosses, word, pos) {
     }
 }
 
+/**
+ * @param {Glosses|undefined} glosses 
+ * @param {string} word 
+ * @param {string} pos 
+ * @returns 
+ */
 function processGermanInflectionGlosses(glosses, word, pos) {
+    if (!glosses || !Array.isArray(glosses)) return;
     const match1 = glosses[0].match(/(.*)des (?:Verbs|Adjektivs|Substantivs|Demonstrativpronomens|Possessivpronomens|Pronomens) (.*)$/);
     if (!match1 || match1.length < 3) return;
     const inflection = match1[1].trim();
@@ -322,6 +402,11 @@ function processGermanInflectionGlosses(glosses, word, pos) {
     }
 }
 
+/**
+ * @param {NestedObject} obj
+ * @param {string[]} keys 
+ * @returns {NestedObject}
+ */
 function ensureNestedObject(obj, keys) {
     for (const key of keys) {
         obj[key] ??= {};
@@ -330,10 +415,17 @@ function ensureNestedObject(obj, keys) {
     return obj;
 }
 
+/**
+ * @param {Glosses|undefined} glosses
+ * @param {string} word 
+ * @param {string} pos 
+ */
 function processEnglishInflectionGlosses(glosses, word, pos) {
-    if(!glosses) return;
-    glossPieces = glosses.flatMap(gloss => gloss.split('##').map(piece => piece.trim()));
+    if(!glosses || !Array.isArray(glosses)) return;
+    const glossPieces = glosses.flatMap(gloss => gloss.split('##').map(piece => piece.trim()));
+    /**  @type {Set<string>} */
     const lemmas = new Set();
+    /**  @type {Set<string>} */
     const inflections = new Set();
     for (const piece of glossPieces) {
         const lemmaMatch = piece.match(/of ([^\s]+)\s*$/);
@@ -371,6 +463,10 @@ function processEnglishInflectionGlosses(glosses, word, pos) {
     }
 }
 
+/**
+ * @param {KaikkiLine} line
+ * @returns {string|undefined}
+ */
 function getCanonicalWordForm({word, forms}) {
     if(!forms) return word;
 
@@ -389,6 +485,11 @@ function getCanonicalWordForm({word, forms}) {
     }
 }
 
+/**
+ * @param {string|undefined} word 
+ * @param {FormInfo[]} forms 
+ * @returns {string|undefined}
+ */
 function getCanonicalForm(word, forms) {
     const canonicalForm = forms.find(form => form.tags &&
         form.tags.includes('canonical')
@@ -400,7 +501,7 @@ function getCanonicalForm(word, forms) {
             word = word.replace(/ {{#if:.+/, '').trim();
         }
 
-        bracketsRegex = /\[.*\]$/;
+        const bracketsRegex = /\[.*\]$/;
         if (bracketsRegex.test(word)) {
             word = word.replace(bracketsRegex, '').trim();
         }
@@ -408,6 +509,11 @@ function getCanonicalForm(word, forms) {
     return word;
 }
 
+/**
+ * @param {string} word 
+ * @param {KaikkiLine} line 
+ * @returns {string[]}
+ */
 function getReadings(word, line){
     switch(sourceIso){
         case 'fa': return [getPersianReading(word, line)];
@@ -417,13 +523,23 @@ function getReadings(word, line){
     }
 }
 
+/**
+ * @param {string} word 
+ * @param {KaikkiLine} line 
+ * @returns {string}
+ */
 function getPersianReading(word, line){
     const {forms} = line;
     if(!forms) return word;
     const romanization = forms.find(({form, tags}) => tags && tags.includes('romanization') && tags.length === 1 && form);
-    return romanization ? romanization.form : word;
+    return romanization?.form || word;
 }
 
+/**
+ * @param {string} word 
+ * @param {KaikkiLine} line 
+ * @returns {string[]}
+ */
 function getJapaneseReadings(word, line){
     const {head_templates} = line;
     if(!head_templates) {
@@ -507,13 +623,14 @@ lr.on('end', () => {
 
     const formsFilePath = `${writeFolder}/${sourceIso}-${targetIso}-forms.json`;
 
+    /** @type {{[chunkIndex: string]: FormsMap}} */
     const mapChunks = Array.from(formsMap.entries()).reduce((acc, [key, value], index) => {
         logProgress("Chunking form dict", index, formsMap.size);
         const chunkIndex = Math.floor(index / 10000);
         acc[chunkIndex] ??= new Map();
         acc[chunkIndex].set(key, value);
         return acc;
-    }, {});
+    }, /** @type {{[chunkIndex: string]: FormsMap}} */ ({}));
     
     if(!mapChunks['0']) {
         mapChunks['0'] = new Map();
diff --git a/4-make-yomitan.js b/4-make-yomitan.js
index 32a9e47..5942af4 100644
--- a/4-make-yomitan.js
+++ b/4-make-yomitan.js
@@ -1,3 +1,4 @@
+//@ts-nocheck
 const path = require('path');
 const { readFileSync, writeFileSync, existsSync, readdirSync, mkdirSync, unlinkSync } = require('fs');
 const { sortTags, writeInBatches, consoleOverwrite, 
diff --git a/data/test/dict/sq/en/tag_bank_1.json b/data/test/dict/sq/en/tag_bank_1.json
index 0143cd3..895bfd1 100644
--- a/data/test/dict/sq/en/tag_bank_1.json
+++ b/data/test/dict/sq/en/tag_bank_1.json
@@ -12,5 +12,12 @@
     -1,
     "masculine",
     1
+  ],
+  [
+    "fem",
+    "",
+    -1,
+    "feminine",
+    1
   ]
 ]
\ No newline at end of file
diff --git a/data/test/dict/sq/en/term_bank_1.json b/data/test/dict/sq/en/term_bank_1.json
index d9ba885..4672f1e 100644
--- a/data/test/dict/sq/en/term_bank_1.json
+++ b/data/test/dict/sq/en/term_bank_1.json
@@ -10,5 +10,174 @@
     ],
     0,
     ""
+  ],
+  [
+    "gjuhë",
+    "",
+    "n fem",
+    "n",
+    0,
+    [
+      {
+        "type": "structured-content",
+        "content": [
+          {
+            "tag": "div",
+            "data": {
+              "listType": "li"
+            },
+            "content": "tongue (organ)"
+          },
+          {
+            "tag": "div",
+            "data": {
+              "listType": "ol"
+            },
+            "style": {
+              "marginLeft": 2
+            },
+            "content": [
+              {
+                "tag": "div",
+                "data": {
+                  "listType": "li"
+                },
+                "content": [
+                  {
+                    "tag": "span",
+                    "data": {
+                      "listType": "number"
+                    },
+                    "content": "1. "
+                  },
+                  "(figurative) speech, talking"
+                ]
+              },
+              {
+                "tag": "div",
+                "data": {
+                  "listType": "li"
+                },
+                "content": [
+                  {
+                    "tag": "span",
+                    "data": {
+                      "listType": "number"
+                    },
+                    "content": "2. "
+                  },
+                  "strip of land"
+                ]
+              },
+              {
+                "tag": "div",
+                "data": {
+                  "listType": "li"
+                },
+                "content": [
+                  {
+                    "tag": "span",
+                    "data": {
+                      "listType": "number"
+                    },
+                    "content": "3. "
+                  },
+                  "bell clapper, clanger, tongue"
+                ]
+              }
+            ]
+          }
+        ]
+      },
+      {
+        "type": "structured-content",
+        "content": [
+          {
+            "tag": "div",
+            "data": {
+              "listType": "li"
+            },
+            "content": "language, tongue"
+          },
+          {
+            "tag": "div",
+            "data": {
+              "listType": "ol"
+            },
+            "style": {
+              "marginLeft": 2
+            },
+            "content": [
+              {
+                "tag": "div",
+                "data": {
+                  "listType": "li"
+                },
+                "content": [
+                  {
+                    "tag": "span",
+                    "data": {
+                      "listType": "number"
+                    },
+                    "content": "1. "
+                  },
+                  "register, speech, style"
+                ]
+              },
+              {
+                "tag": "div",
+                "data": {
+                  "listType": "li"
+                },
+                "content": [
+                  {
+                    "tag": "span",
+                    "data": {
+                      "listType": "number"
+                    },
+                    "content": "2. "
+                  },
+                  "language (generally, any form of communication)"
+                ]
+              },
+              {
+                "tag": "div",
+                "data": {
+                  "listType": "li"
+                },
+                "content": [
+                  {
+                    "tag": "span",
+                    "data": {
+                      "listType": "number"
+                    },
+                    "content": "3. "
+                  },
+                  "(colloquial) local dialect"
+                ]
+              },
+              {
+                "tag": "div",
+                "data": {
+                  "listType": "li"
+                },
+                "content": [
+                  {
+                    "tag": "span",
+                    "data": {
+                      "listType": "number"
+                    },
+                    "content": "4. "
+                  },
+                  "(colloquial) Albanian, as a subject in school"
+                ]
+              }
+            ]
+          }
+        ]
+      }
+    ],
+    0,
+    ""
   ]
 ]
\ No newline at end of file
diff --git a/data/test/dict/sq/en/term_bank_2.json b/data/test/dict/sq/en/term_bank_2.json
index 7f01320..f9af542 100644
--- a/data/test/dict/sq/en/term_bank_2.json
+++ b/data/test/dict/sq/en/term_bank_2.json
@@ -34,5 +34,202 @@
     ],
     0,
     ""
+  ],
+  [
+    "gjuhëra／gjuhëna",
+    "",
+    "non-lemma",
+    "",
+    0,
+    [
+      [
+        "gjuhë",
+        [
+          "plural",
+          "dialectal"
+        ]
+      ]
+    ],
+    0,
+    ""
+  ],
+  [
+    "gjuha",
+    "",
+    "non-lemma",
+    "",
+    0,
+    [
+      [
+        "gjuhë",
+        [
+          "definite"
+        ]
+      ],
+      [
+        "gjuhë",
+        [
+          "nominative",
+          "singular",
+          "definite"
+        ]
+      ]
+    ],
+    0,
+    ""
+  ],
+  [
+    "gjuhët",
+    "",
+    "non-lemma",
+    "",
+    0,
+    [
+      [
+        "gjuhë",
+        [
+          "accusative",
+          "plural",
+          "definite"
+        ]
+      ],
+      [
+        "gjuhë",
+        [
+          "nominative",
+          "plural",
+          "definite"
+        ]
+      ]
+    ],
+    0,
+    ""
+  ],
+  [
+    "gjuhën",
+    "",
+    "non-lemma",
+    "",
+    0,
+    [
+      [
+        "gjuhë",
+        [
+          "accusative",
+          "singular",
+          "definite"
+        ]
+      ]
+    ],
+    0,
+    ""
+  ],
+  [
+    "gjuhe",
+    "",
+    "non-lemma",
+    "",
+    0,
+    [
+      [
+        "gjuhë",
+        [
+          "singular",
+          "indefinite",
+          "ablative"
+        ]
+      ],
+      [
+        "gjuhë",
+        [
+          "dative",
+          "singular",
+          "indefinite"
+        ]
+      ]
+    ],
+    0,
+    ""
+  ],
+  [
+    "gjuhës",
+    "",
+    "non-lemma",
+    "",
+    0,
+    [
+      [
+        "gjuhë",
+        [
+          "singular",
+          "definite",
+          "ablative"
+        ]
+      ],
+      [
+        "gjuhë",
+        [
+          "dative",
+          "singular",
+          "definite"
+        ]
+      ]
+    ],
+    0,
+    ""
+  ],
+  [
+    "gjuhëve",
+    "",
+    "non-lemma",
+    "",
+    0,
+    [
+      [
+        "gjuhë",
+        [
+          "plural",
+          "definite",
+          "ablative"
+        ]
+      ],
+      [
+        "gjuhë",
+        [
+          "dative",
+          "plural",
+          "definite"
+        ]
+      ],
+      [
+        "gjuhë",
+        [
+          "dative",
+          "plural",
+          "indefinite"
+        ]
+      ]
+    ],
+    0,
+    ""
+  ],
+  [
+    "gjuhësh",
+    "",
+    "non-lemma",
+    "",
+    0,
+    [
+      [
+        "gjuhë",
+        [
+          "plural",
+          "indefinite",
+          "ablative"
+        ]
+      ]
+    ],
+    0,
+    ""
   ]
 ]
\ No newline at end of file
diff --git a/data/test/ipa/sq/en/tag_bank_1.json b/data/test/ipa/sq/en/tag_bank_1.json
index 0637a08..0dd99bc 100644
--- a/data/test/ipa/sq/en/tag_bank_1.json
+++ b/data/test/ipa/sq/en/tag_bank_1.json
@@ -1 +1,9 @@
-[]
\ No newline at end of file
+[
+  [
+    "Gheg",
+    "dialect",
+    0,
+    "Gheg",
+    0
+  ]
+]
\ No newline at end of file
diff --git a/data/test/ipa/sq/en/term_meta_bank_1.json b/data/test/ipa/sq/en/term_meta_bank_1.json
index 27aa487..32e1f48 100644
--- a/data/test/ipa/sq/en/term_meta_bank_1.json
+++ b/data/test/ipa/sq/en/term_meta_bank_1.json
@@ -11,5 +11,51 @@
         }
       ]
     }
+  ],
+  [
+    "gjuhë",
+    "ipa",
+    {
+      "reading": "gjuhë",
+      "transcriptions": [
+        {
+          "ipa": "/ˈɟuhə/",
+          "tags": []
+        },
+        {
+          "ipa": "[ˈɡjuː(h)]",
+          "tags": [
+            "Gheg",
+            "Northern"
+          ]
+        },
+        {
+          "ipa": "[ˈɡuː(h)]",
+          "tags": [
+            "Gheg",
+            "Northern"
+          ]
+        },
+        {
+          "ipa": "[ˈɡũː]",
+          "tags": [
+            "Kosovo"
+          ]
+        },
+        {
+          "ipa": "[ˈɡʎuhə]",
+          "tags": [
+            "Arbëresh",
+            "Arvanitika"
+          ]
+        },
+        {
+          "ipa": "[ˈɡʎuɣə]",
+          "tags": [
+            "Calabria"
+          ]
+        }
+      ]
+    }
   ]
 ]
\ No newline at end of file
diff --git a/data/test/kaikki/sq-en.json b/data/test/kaikki/sq-en.json
index b6169d9..1c2e7cd 100644
--- a/data/test/kaikki/sq-en.json
+++ b/data/test/kaikki/sq-en.json
@@ -1,2 +1,3 @@
 {"pos": "noun", "head_templates": [{"name": "head", "args": {"1": "sq", "2": "noun", "head": "", "sort": "", "g": "m", "cat2": "masculine nouns"}, "expansion": "akull m"}, {"name": "sq-noun", "args": {"1": "m", "2": "akuj"}, "expansion": "akull m (plural akuj)"}], "forms": [{"form": "akuj", "tags": ["plural"]}], "etymology_number": 1, "wikipedia": ["Vladimir Orel"], "etymology_text": "Uncertain. Possibly:\n# A derivation from Proto-Indo-European *keHl- whence also Proto-Celtic *kaletos (“hard”), Proto-Slavic *kaliti (“to temper, harden”), Latin callum (“hardened skin”).\n# Borrowed from Germanic, ultimately from Proto-Germanic *jekulaz (“icicle”).\n# Akin Old Armenian ոյծ (oyc, “cold, frost”), suffixed with -ull, though the two terms are phonologically incompatible.", "etymology_templates": [{"name": "unc", "args": {"1": "sq"}, "expansion": "Uncertain"}, {"name": "der", "args": {"1": "sq", "2": "ine-pro", "3": "", "4": "*keHl-"}, "expansion": "Proto-Indo-European *keHl-"}, {"name": "cog", "args": {"1": "cel-pro", "2": "*kaletos", "t": "hard"}, "expansion": "Proto-Celtic *kaletos (“hard”)"}, {"name": "cog", "args": {"1": "sla-pro", "2": "*kaliti", "t": "to temper, harden"}, "expansion": "Proto-Slavic *kaliti (“to temper, harden”)"}, {"name": "cog", "args": {"1": "la", "2": "callum", "t": "hardened skin"}, "expansion": "Latin callum (“hardened skin”)"}, {"name": "glossary", "args": {"1": "loanword", "2": "Borrowed"}, "expansion": "Borrowed"}, {"name": "bor", "args": {"1": "sq", "2": "gem", "3": "", "4": "", "5": "", "lit": "", "pos": "", "tr": "", "ts": "", "id": "", "sc": "", "g": "", "g2": "", "g3": "", "nocat": "", "sort": ""}, "expansion": "Germanic"}, {"name": "bor+", "args": {"1": "sq", "2": "gem"}, "expansion": "Borrowed from Germanic"}, {"name": "der", "args": {"1": "sq", "2": "gem-pro", "3": "*jekulaz", "t": "icicle"}, "expansion": "Proto-Germanic *jekulaz (“icicle”)"}, {"name": "cog", "args": {"1": "xcl", "2": "ոյծ", "t": "cold, frost"}, "expansion": "Old Armenian ոյծ (oyc, “cold, frost”)"}, {"name": "af", "args": {"1": "sq", "2": "-ull"}, "expansion": "-ull"}], "sounds": [{"ipa": "/ˈakuɫ/"}, {"rhymes": "-akuɫ"}], "word": "akull", "lang": "Albanian", "lang_code": "sq", "senses": [{"links": [["ice", "ice"]], "glosses": ["ice"], "tags": ["masculine"], "id": "akull-sq-noun-TLCyUMYl"}]}
-{"pos": "verb", "head_templates": [{"name": "head", "args": {"1": "sq", "2": "verb form"}, "expansion": "ik"}], "word": "ik", "lang": "Albanian", "lang_code": "sq", "senses": [{"links": [["iki", "iki#Albanian"]], "glosses": ["second-person singular imperative of iki"], "tags": ["form-of", "imperative", "second-person", "singular"], "form_of": [{"word": "iki"}], "id": "ik-sq-verb-OAavRVQB", "categories": []}]}
\ No newline at end of file
+{"pos": "verb", "head_templates": [{"name": "head", "args": {"1": "sq", "2": "verb form"}, "expansion": "ik"}], "word": "ik", "lang": "Albanian", "lang_code": "sq", "senses": [{"links": [["iki", "iki#Albanian"]], "glosses": ["second-person singular imperative of iki"], "tags": ["form-of", "imperative", "second-person", "singular"], "form_of": [{"word": "iki"}], "id": "ik-sq-verb-OAavRVQB", "categories": []}]}
+{"pos": "noun", "forms": [{"form": "gjuhë", "tags": ["plural"]}, {"form": "gjuhëra／gjuhëna", "tags": ["dialectal", "plural"]}, {"form": "gjuha", "tags": ["definite"]}, {"form": "no-table-tags", "source": "declension", "tags": ["table-tags"]}, {"form": "sq-noun-f", "source": "declension", "tags": ["inflection-template"]}, {"form": "gjuhë", "tags": ["indefinite", "nominative", "singular"], "source": "declension"}, {"form": "gjuha", "tags": ["definite", "nominative", "singular"], "source": "declension"}, {"form": "gjuhë", "tags": ["indefinite", "nominative", "plural"], "source": "declension"}, {"form": "gjuhët", "tags": ["definite", "nominative", "plural"], "source": "declension"}, {"form": "gjuhë", "tags": ["accusative", "indefinite", "singular"], "source": "declension"}, {"form": "gjuhën", "tags": ["accusative", "definite", "singular"], "source": "declension"}, {"form": "gjuhë", "tags": ["accusative", "indefinite", "plural"], "source": "declension"}, {"form": "gjuhët", "tags": ["accusative", "definite", "plural"], "source": "declension"}, {"form": "gjuhe", "tags": ["dative", "indefinite", "singular"], "source": "declension"}, {"form": "gjuhës", "tags": ["dative", "definite", "singular"], "source": "declension"}, {"form": "gjuhëve", "tags": ["dative", "indefinite", "plural"], "source": "declension"}, {"form": "gjuhëve", "tags": ["dative", "definite", "plural"], "source": "declension"}, {"form": "gjuhe", "tags": ["ablative", "indefinite", "singular"], "source": "declension"}, {"form": "gjuhës", "tags": ["ablative", "definite", "singular"], "source": "declension"}, {"form": "gjuhësh", "tags": ["ablative", "indefinite", "plural"], "source": "declension"}, {"form": "gjuhëve", "tags": ["ablative", "definite", "plural"], "source": "declension"}], "inflection_templates": [{"name": "sq-decl-noun", "args": {"1": "gjuhë", "2": "gjuhë", "3": "gjuha", "4": "gjuhët", "5": "gjuhë", "6": "gjuhë", "7": "gjuhën", "8": "gjuhët", "9": "gjuhe", "10": "gjuhëve", "11": "gjuhës", "12": "gjuhëve", "13": "gjuhësh"}}], "sounds": [{"ipa": "/ˈɟuhə/"}, {"tags": ["Gheg", "Northern"], "ipa": "[ˈɡjuː(h)]"}, {"tags": ["Gheg", "Northern"], "ipa": "[ˈɡuː(h)]"}, {"tags": ["Kosovo"], "ipa": "[ˈɡũː]"}, {"tags": ["Arbëresh", "Arvanitika"], "ipa": "[ˈɡʎuhə]"}, {"note": "Calabria", "ipa": "[ˈɡʎuɣə]"}, {"rhymes": "-uhə"}], "wikipedia": ["Vladimir Orel"], "etymology_text": "Unclear. Akin to Arbëresh glunzë (“voice”). Possibilities include:\n# Inherited from Proto-Indo-European *gol(H)-s-os, via a byform *gl̥(H)-s-ós, whence also Proto-Slavic *golsъ (“voice”), Lithuanian gal̃sas (“voice”), Proto-Germanic *kalz-ōną (“to call”). However the medial -h- instead of expected **-sh- is left unexplained.\n# From a byform *ǵʰnud-sḱ-eh₂, doubly methasised from Proto-Indo-European *dn̥ǵʰwéh₂s ~ *dn̥ǵʰuh₂és (“tongue”). Compare Tocharian B kantwo, also metathised. The outcome gl- (and later gj-) from original *ǵ(ʰ)n- is also attested in gju (“knee”). The usage of the infixed *-sḱ- does not seem have any parallels.\n# A connection with Ancient Greek γλῶσσα (glôssa), itself of unclear origin, cannot be proven.", "etymology_templates": [{"name": "unk", "args": {"1": "sq", "2": "Unclear"}, "expansion": "Unclear"}, {"name": "glossary", "args": {"1": "Inherited"}, "expansion": "Inherited"}, {"name": "inh", "args": {"1": "sq", "2": "ine-pro", "3": "", "4": "*gol(H)-s-os", "5": "", "lit": "", "pos": "", "tr": "", "ts": "", "id": "", "sc": "", "g": "", "g2": "", "g3": "", "nocat": "", "sort": ""}, "expansion": "Proto-Indo-European *gol(H)-s-os"}, {"name": "inh+", "args": {"1": "sq", "2": "ine-pro", "3": "", "4": "*gol(H)-s-os"}, "expansion": "Inherited from Proto-Indo-European *gol(H)-s-os"}, {"name": "cog", "args": {"1": "sla-pro", "2": "*golsъ", "t": "voice"}, "expansion": "Proto-Slavic *golsъ (“voice”)"}, {"name": "cog", "args": {"1": "lt", "2": "gal̃sas", "t": "voice"}, "expansion": "Lithuanian gal̃sas (“voice”)"}, {"name": "cog", "args": {"1": "gem-pro", "2": "*kalzōną", "3": "*kalz-ōną", "t": "to call"}, "expansion": "Proto-Germanic *kalz-ōną (“to call”)"}, {"name": "der", "args": {"1": "sq", "2": "ine-pro", "3": "*dn̥ǵʰwéh₂s", "4": "*dn̥ǵʰwéh₂s ~ *dn̥ǵʰuh₂és", "t": "tongue"}, "expansion": "Proto-Indo-European *dn̥ǵʰwéh₂s ~ *dn̥ǵʰuh₂és (“tongue”)"}, {"name": "cog", "args": {"1": "txb", "2": "kantwo"}, "expansion": "Tocharian B kantwo"}, {"name": "cog", "args": {"1": "grc", "2": "γλῶσσα"}, "expansion": "Ancient Greek γλῶσσα (glôssa)"}], "word": "gjuhë", "lang": "Albanian", "lang_code": "sq", "synonyms": [{"tags": ["obsolete"], "word": "gluhë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "now Cham", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "Arbëresh", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "Arvanitika", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"tags": ["obsolete"], "word": "gjuhu", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "Gheg", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhunë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"tags": ["Gheg"], "word": "gjuhënë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"tags": ["Gheg", "Northern"], "word": "guhë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"tags": ["dialectal"], "word": "gû", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "Kosovo", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gju — Borgo Erizzo", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"english": "Sicily", "word": "gëluhë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"english": "Calabria", "word": "gjufë", "_dis1": "0 0 0 0 0 0 0 0 0"}], "derived": [{"word": "dygjuhësi", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "dygjuhësh", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhcë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhësi", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhësisht", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhësor", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëtar", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëz", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëzoj", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhor", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "shumëgjuhësh", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhë letrare", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhë nëne", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëbilbil", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëbrisk", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëçarë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëdele", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëdreri", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëgjarpër", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëgjatë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëhelm", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëkrijues", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëkuq", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëlashtë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëlëshuar", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëlidhur", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëlopatë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëlopë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëllomkë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëmbajtur", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëmite", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëmpirë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëmprehtë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhënepërkë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhënuse", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëpremë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëprerë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëqen", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhërrënduar", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëshkurtër", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëshkurtuar", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëshpatë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhështhurur", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëtrashë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëtharë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëthikë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëzënë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhujëse", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhustër", "_dis1": "0 0 0 0 0 0 0 0 0"}], "senses": [{"examples": [{"text": "Mbaje gjuhën!", "english": "Hold your tongue!", "type": "example"}, {"text": "E ka gjuhën të gjatë.", "english": "(literally, “She has a long tongue.”)", "type": "example", "roman": "She is very talkative."}], "links": [["tongue", "tongue"], ["speech", "speech"], ["talking", "talking"]], "raw_glosses": ["tongue (organ)", "(figurative) speech, talking"], "glosses": ["tongue (organ)", "speech, talking"], "synonyms": [{"word": "gojë"}], "tags": ["feminine", "figuratively"], "id": "en-gjuhë-sq-noun-4U3OJriL", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}, {"name": "Body parts", "kind": "topical", "parents": ["Body", "Anatomy", "All topics", "Biology", "Medicine", "Fundamental", "Sciences", "Healthcare", "Health"], "source": "w", "orig": "sq:Body parts", "langcode": "sq"}]}, {"links": [["tongue", "tongue"], ["strip", "strip"], ["land", "land"]], "glosses": ["tongue (organ)", "strip of land"], "synonyms": [{"word": "rrip"}], "tags": ["feminine"], "id": "en-gjuhë-sq-noun-Tfx~l-b2", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}, {"name": "Body parts", "kind": "topical", "parents": ["Body", "Anatomy", "All topics", "Biology", "Medicine", "Fundamental", "Sciences", "Healthcare", "Health"], "source": "w", "orig": "sq:Body parts", "langcode": "sq"}]}, {"links": [["tongue", "tongue"], ["bell", "bell"], ["clapper", "clapper"], ["clanger", "clanger"]], "glosses": ["tongue (organ)", "bell clapper, clanger, tongue"], "synonyms": [{"word": "gjuhëz"}], "tags": ["feminine"], "id": "en-gjuhë-sq-noun-zu-bA4a3", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}, {"name": "Body parts", "kind": "topical", "parents": ["Body", "Anatomy", "All topics", "Biology", "Medicine", "Fundamental", "Sciences", "Healthcare", "Health"], "source": "w", "orig": "sq:Body parts", "langcode": "sq"}]}, {"examples": [{"text": "gjuhë lope e zier", "english": "boiled beef tongue", "type": "example"}, {"text": "Dogji gjuhën.", "english": "I burned my tongue.", "type": "example"}], "links": [["tongue", "tongue"]], "glosses": ["tongue (organ)"], "tags": ["feminine"], "id": "en-gjuhë-sq-noun-r4b272FF", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}, {"name": "Body parts", "kind": "topical", "parents": ["Body", "Anatomy", "All topics", "Biology", "Medicine", "Fundamental", "Sciences", "Healthcare", "Health"], "source": "w", "orig": "sq:Body parts", "langcode": "sq"}]}, {"examples": [{"text": "gjuha e fëmijëve", "english": "children speech", "type": "example"}, {"text": "gjuhë e trashë", "english": "foul language", "type": "example"}, {"text": "gjuha e shkrimtarit", "english": "the author's style", "type": "example"}], "links": [["language", "language"], ["tongue", "tongue"], ["register", "register"], ["speech", "speech"], ["style", "style"]], "glosses": ["language, tongue", "register, speech, style"], "synonyms": [{"word": "ligjërim"}, {"word": "stil"}], "tags": ["feminine"], "id": "en-gjuhë-sq-noun--CHs0sns", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}]}, {"examples": [{"text": "gjuha e muzikës", "english": "music's language", "type": "example"}, {"text": "gjuha e bletëve", "english": "bees' language", "type": "example"}], "links": [["language", "language"], ["tongue", "tongue"]], "glosses": ["language, tongue", "language (generally, any form of communication)"], "tags": ["feminine"], "id": "en-gjuhë-sq-noun-nlIefoUV", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}]}, {"links": [["language", "language"], ["tongue", "tongue"], ["dialect", "dialect"]], "raw_glosses": ["language, tongue", "(colloquial) local dialect"], "glosses": ["language, tongue", "local dialect"], "synonyms": [{"word": "e folme"}, {"word": "dialekt"}], "tags": ["colloquial", "feminine"], "id": "en-gjuhë-sq-noun-mWdoYa8o", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}]}, {"links": [["language", "language"], ["tongue", "tongue"], ["Albanian", "Albanian"], ["subject", "subject"]], "raw_glosses": ["language, tongue", "(colloquial) Albanian, as a subject in school"], "glosses": ["language, tongue", "Albanian, as a subject in school"], "tags": ["colloquial", "feminine"], "id": "en-gjuhë-sq-noun-7CKeEbtj", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}, {"name": "Albanian entries with incorrect language header", "kind": "other", "parents": ["Entries with incorrect language header", "Entry maintenance"], "source": "w+disamb", "_dis": "3 3 4 3 16 16 16 23 16"}, {"name": "Pages with 1 entry", "kind": "other", "parents": [], "source": "w+disamb", "_dis": "4 2 7 2 9 10 9 48 9"}, {"name": "Pages with entries", "kind": "other", "parents": [], "source": "w+disamb", "_dis": "4 2 3 2 9 15 9 47 9"}]}, {"examples": [{"text": "gjuha shqipe", "english": "the Albanian language", "type": "example"}], "links": [["language", "language"], ["tongue", "tongue"]], "glosses": ["language, tongue"], "tags": ["feminine"], "id": "en-gjuhë-sq-noun-GSYYUYQQ", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}]}]}
\ No newline at end of file
diff --git a/data/test/tidy/sq-en-forms-0.json b/data/test/tidy/sq-en-forms-0.json
index bde81ba..76f289c 100644
--- a/data/test/tidy/sq-en-forms-0.json
+++ b/data/test/tidy/sq-en-forms-0.json
@@ -44,6 +44,132 @@
           ]
         ]
       }
+    ],
+    [
+      "gjuhë",
+      {
+        "_type": "map",
+        "map": [
+          [
+            "gjuhëra／gjuhëna",
+            {
+              "_type": "map",
+              "map": [
+                [
+                  "noun",
+                  [
+                    "plural dialectal"
+                  ]
+                ]
+              ]
+            }
+          ],
+          [
+            "gjuha",
+            {
+              "_type": "map",
+              "map": [
+                [
+                  "noun",
+                  [
+                    "definite",
+                    "nominative singular definite"
+                  ]
+                ]
+              ]
+            }
+          ],
+          [
+            "gjuhët",
+            {
+              "_type": "map",
+              "map": [
+                [
+                  "noun",
+                  [
+                    "accusative plural definite",
+                    "nominative plural definite"
+                  ]
+                ]
+              ]
+            }
+          ],
+          [
+            "gjuhën",
+            {
+              "_type": "map",
+              "map": [
+                [
+                  "noun",
+                  [
+                    "accusative singular definite"
+                  ]
+                ]
+              ]
+            }
+          ],
+          [
+            "gjuhe",
+            {
+              "_type": "map",
+              "map": [
+                [
+                  "noun",
+                  [
+                    "singular indefinite ablative",
+                    "dative singular indefinite"
+                  ]
+                ]
+              ]
+            }
+          ],
+          [
+            "gjuhës",
+            {
+              "_type": "map",
+              "map": [
+                [
+                  "noun",
+                  [
+                    "singular definite ablative",
+                    "dative singular definite"
+                  ]
+                ]
+              ]
+            }
+          ],
+          [
+            "gjuhëve",
+            {
+              "_type": "map",
+              "map": [
+                [
+                  "noun",
+                  [
+                    "plural definite ablative",
+                    "dative plural definite",
+                    "dative plural indefinite"
+                  ]
+                ]
+              ]
+            }
+          ],
+          [
+            "gjuhësh",
+            {
+              "_type": "map",
+              "map": [
+                [
+                  "noun",
+                  [
+                    "plural indefinite ablative"
+                  ]
+                ]
+              ]
+            }
+          ]
+        ]
+      }
     ]
   ]
 }
\ No newline at end of file
diff --git a/data/test/tidy/sq-en-lemmas.json b/data/test/tidy/sq-en-lemmas.json
index 1956d77..b8482da 100644
--- a/data/test/tidy/sq-en-lemmas.json
+++ b/data/test/tidy/sq-en-lemmas.json
@@ -20,5 +20,224 @@
         ]
       }
     }
+  },
+  "gjuhë": {
+    "gjuhë": {
+      "noun": {
+        "ipa": [
+          {
+            "ipa": "/ˈɟuhə/",
+            "tags": []
+          },
+          {
+            "ipa": "[ˈɡjuː(h)]",
+            "tags": [
+              "Gheg",
+              "Northern"
+            ]
+          },
+          {
+            "ipa": "[ˈɡuː(h)]",
+            "tags": [
+              "Gheg",
+              "Northern"
+            ]
+          },
+          {
+            "ipa": "[ˈɡũː]",
+            "tags": [
+              "Kosovo"
+            ]
+          },
+          {
+            "ipa": "[ˈɡʎuhə]",
+            "tags": [
+              "Arbëresh",
+              "Arvanitika"
+            ]
+          },
+          {
+            "ipa": "[ˈɡʎuɣə]",
+            "tags": [
+              "Calabria"
+            ]
+          }
+        ],
+        "senses": [
+          {
+            "glosses": [
+              {
+                "type": "structured-content",
+                "content": [
+                  {
+                    "tag": "div",
+                    "data": {
+                      "listType": "li"
+                    },
+                    "content": "tongue (organ)"
+                  },
+                  {
+                    "tag": "div",
+                    "data": {
+                      "listType": "ol"
+                    },
+                    "style": {
+                      "marginLeft": 2
+                    },
+                    "content": [
+                      {
+                        "tag": "div",
+                        "data": {
+                          "listType": "li"
+                        },
+                        "content": [
+                          {
+                            "tag": "span",
+                            "data": {
+                              "listType": "number"
+                            },
+                            "content": "1. "
+                          },
+                          "(figurative) speech, talking"
+                        ]
+                      },
+                      {
+                        "tag": "div",
+                        "data": {
+                          "listType": "li"
+                        },
+                        "content": [
+                          {
+                            "tag": "span",
+                            "data": {
+                              "listType": "number"
+                            },
+                            "content": "2. "
+                          },
+                          "strip of land"
+                        ]
+                      },
+                      {
+                        "tag": "div",
+                        "data": {
+                          "listType": "li"
+                        },
+                        "content": [
+                          {
+                            "tag": "span",
+                            "data": {
+                              "listType": "number"
+                            },
+                            "content": "3. "
+                          },
+                          "bell clapper, clanger, tongue"
+                        ]
+                      }
+                    ]
+                  }
+                ]
+              }
+            ],
+            "tags": [
+              "feminine"
+            ]
+          },
+          {
+            "glosses": [
+              {
+                "type": "structured-content",
+                "content": [
+                  {
+                    "tag": "div",
+                    "data": {
+                      "listType": "li"
+                    },
+                    "content": "language, tongue"
+                  },
+                  {
+                    "tag": "div",
+                    "data": {
+                      "listType": "ol"
+                    },
+                    "style": {
+                      "marginLeft": 2
+                    },
+                    "content": [
+                      {
+                        "tag": "div",
+                        "data": {
+                          "listType": "li"
+                        },
+                        "content": [
+                          {
+                            "tag": "span",
+                            "data": {
+                              "listType": "number"
+                            },
+                            "content": "1. "
+                          },
+                          "register, speech, style"
+                        ]
+                      },
+                      {
+                        "tag": "div",
+                        "data": {
+                          "listType": "li"
+                        },
+                        "content": [
+                          {
+                            "tag": "span",
+                            "data": {
+                              "listType": "number"
+                            },
+                            "content": "2. "
+                          },
+                          "language (generally, any form of communication)"
+                        ]
+                      },
+                      {
+                        "tag": "div",
+                        "data": {
+                          "listType": "li"
+                        },
+                        "content": [
+                          {
+                            "tag": "span",
+                            "data": {
+                              "listType": "number"
+                            },
+                            "content": "3. "
+                          },
+                          "(colloquial) local dialect"
+                        ]
+                      },
+                      {
+                        "tag": "div",
+                        "data": {
+                          "listType": "li"
+                        },
+                        "content": [
+                          {
+                            "tag": "span",
+                            "data": {
+                              "listType": "number"
+                            },
+                            "content": "4. "
+                          },
+                          "(colloquial) Albanian, as a subject in school"
+                        ]
+                      }
+                    ]
+                  }
+                ]
+              }
+            ],
+            "tags": [
+              "feminine"
+            ]
+          }
+        ]
+      }
+    }
   }
 }
\ No newline at end of file
diff --git a/jsconfig.json b/jsconfig.json
new file mode 100644
index 0000000..609e7cc
--- /dev/null
+++ b/jsconfig.json
@@ -0,0 +1,21 @@
+{
+    "compilerOptions": {
+        "module": "ES2022",
+        "target": "ES2022",
+        "checkJs": true,
+        "strict": true,
+        "strictNullChecks": true,
+        "noImplicitAny": true,
+        "strictPropertyInitialization": true,
+        "suppressImplicitAnyIndexErrors": false
+    },
+    "paths": {
+        "*": ["./types/*"],
+        "ext/json-schema": ["./types/ext/json-schema"]
+    },
+    "exclude": [
+        "node_modules", 
+        "**/node_modules/*"
+    ]
+}
+  
\ No newline at end of file
diff --git a/types/types.ts b/types/types.ts
new file mode 100644
index 0000000..ab1e7ab
--- /dev/null
+++ b/types/types.ts
@@ -0,0 +1,106 @@
+declare global {
+    type TidyEnv = {
+        source_iso: string,
+        target_iso: string,
+        kaikki_file: string,
+        tidy_folder: string,
+    }
+
+     type KaikkiLine = {
+        head_templates?: HeadTemplate[];
+        word?: string;
+        pos?: string;
+        sounds?: Sound[];  
+        forms?: FormInfo[];
+        senses?: KaikkiSense[];
+    }
+
+    type HeadTemplate = {
+        name?: string;
+        args?: string[];
+    }
+
+    type Sound = {
+        ipa?: string|string[];
+        tags?: string[];
+        note?: string;
+    }
+
+    type FormInfo = {
+        form?: string;
+        tags?: string[];
+    }
+
+    type KaikkiSense = {
+        glosses?: Glosses;
+        raw_glosses?: Glosses;
+        raw_gloss?: Glosses;
+        tags?: string[];
+        raw_tags?: string[];
+        form_of?: FormOf[];
+    }
+
+    type Glosses = string | string[];
+    
+    type FormOf = {
+        word?: string;
+    }
+
+    type GlossTree = Map<string, GlossTree> & {
+        get(key: '_tags'): string[] | undefined;
+        set(key: '_tags', value: string[]): GlossTree;
+    };
+      
+    type TidySense = Omit<KaikkiSense, 'tags'> & {
+        tags: string[];
+        glossesArray: string[];
+    }
+
+    type LemmaDict = {
+        [word: string]: {
+            [reading: string]: {
+                [pos: string]: LemmaInfo
+            }
+        }
+    }
+
+    type LemmaInfo = {
+        ipa: IpaInfo[],
+        senses: SenseInfo[],
+    }
+
+    type IpaInfo = {
+        ipa: string,
+        tags: string[],
+    }
+
+    type SenseInfo = {
+        glosses: YomitanGloss[],
+        tags: string[],
+    }
+
+    type YomitanGloss = string | StructuredGloss
+    
+    type StructuredGloss = {
+        type: "structured-content",
+        content: string | StructuredContent[],
+    }
+
+    type StructuredContent = {
+        tag: string,
+        data: string,
+        content: StructuredContent,
+    }
+
+    type Lemma = string;
+    type Form = string;
+    type PoS = string;
+    type FormsMap = Map<Lemma, Map<Form, Map<PoS, string[]>>>;
+    type AutomatedForms = Map<Lemma, Map<Form, Map<PoS, Set<string>|string[]>>>;
+
+    type NestedObject = {
+        [key: string]: NestedObject | any;
+    }
+}
+
+export {} // This is needed to make this file a module
\ No newline at end of file
diff --git a/util/util.js b/util/util.js
index 1f3aede..09f3499 100644
--- a/util/util.js
+++ b/util/util.js
@@ -1,3 +1,4 @@
+//@ts-nocheck
 const path = require('path');
 const { readFileSync, writeFileSync, existsSync } = require('fs');
 const date = require('date-and-time');
@@ -35,7 +36,10 @@ function sortTags(targetIso, tags) {
 }
 
 // sorts inflection entries to be nearby similar inflections
-
+/**
+ * @param {string[]} tags 
+ * @returns {string[]}
+ */
 function similarSort(tags) {
     return tags.sort((a, b) => {
         const aWords = a.split(' ');
@@ -62,6 +66,11 @@ function similarSort(tags) {
 // input: ['first-person singular present', 'third-person singular present']
 // output: ['first/third-person singular present']
 
+/**
+ * @param {string} targetIso 
+ * @param {string[]} tags 
+ * @returns {string[]}
+ */
 function mergePersonTags(targetIso, tags) {
     const persons = ["first-person", "second-person", "third-person"];
 
@@ -70,7 +79,9 @@ function mergePersonTags(targetIso, tags) {
             return items.sort((a, b) => persons.indexOf(a) - persons.indexOf(b));
         }
 
+        /** @type {string[]} */
         const result = [];
+        /** @type {Object<string, string[]>} */
         const mergeObj = {};
 
         for (const item of tags) {