Skip to content

Commit

Permalink
reorganizing
Browse files Browse the repository at this point in the history
  • Loading branch information
StefanVukovic99 committed Jan 20, 2024
1 parent 1d16219 commit 87660d7
Show file tree
Hide file tree
Showing 10 changed files with 47 additions and 905 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
*.json
!tag_bank_term.json
!tag_bank_ipa.json
!languages.json
!package.json

*.zip
Expand Down
6 changes: 2 additions & 4 deletions 1-create-folders.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,14 @@
const {mkdirSync} = require('fs');

const folders = [
'freq',
'freq/metadata',
'kaikki',
'sentences',
'tidy',
'language',
'temp',
'temp/dict',
'temp/freq',
'temp/ipa'
'temp/ipa',
'test'
];

for (const folder of folders) {
Expand Down
12 changes: 6 additions & 6 deletions 5-make-yezichak.js → 3-make-yomitan.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ const {source_iso, target_iso, DEBUG_WORD, DICT_NAME} = process.env;

const currentDate = date.format(now, 'YYYY.MM.DD');

console.log(`5-make-yezichak.js: reading lemmas`);
console.log(`3-make-yomitan.js: reading lemmas`);
const lemmaDict = JSON.parse(readFileSync(`data/tidy/${source_iso}-${target_iso}-lemmas.json`));
console.log(`5-make-yezichak.js: reading forms`);
console.log(`3-make-yomitan.js: reading forms`);
const formDict = JSON.parse(readFileSync(`data/tidy/${source_iso}-${target_iso}-forms.json`));

if (!existsSync(`data/language/${source_iso}/${target_iso}`)) {
Expand Down Expand Up @@ -112,7 +112,7 @@ const skippedTermTags = {};
let ipaCount = 0;
let termTagCount = 0;

console.log('5-make-yezichak.js: processing lemmas...');
console.log('3-make-yomitan.js: processing lemmas...');
for (const [lemma, infoMap] of Object.entries(lemmaDict)) {
normalizedLemma = normalizeOrthography(lemma);

Expand Down Expand Up @@ -258,7 +258,7 @@ const multiwordInflections = [
'female equivalent', // de
];

console.log('5-make-yezichak.js: Processing forms...');
console.log('3-make-yomitan.js: Processing forms...');
for (const [form, allInfo] of Object.entries(formDict)) {
for (const [lemma, info] of Object.entries(allInfo)) {
for (const [pos, glosses] of Object.entries(info)) {
Expand Down Expand Up @@ -349,7 +349,7 @@ const indexJson = {
const folders = ['dict', 'ipa'];

for (const folder of folders) {
console.log(`5-make-yezichak.js: Writing ${folder}...`);
console.log(`3-make-yomitan.js: Writing ${folder}...`);
for (const file of readdirSync(`${tempPath}/${folder}`)) {
if (file.includes('term_')) { unlinkSync(`${tempPath}/${folder}/${file}`); }
}
Expand All @@ -372,7 +372,7 @@ writeFileSync(`data/language/${source_iso}/${target_iso}/skippedIpaTags.json`, J
console.log('total term tags', termTagCount, 'skipped term tags', Object.values(skippedTermTags).reduce((a, b) => a + (parseInt(b) || 0), 0));
writeFileSync(`data/language/${source_iso}/${target_iso}/skippedTermTags.json`, JSON.stringify(sortBreakdown(skippedTermTags), null, 2));

console.log('5-make-yezichak.js: Done!');
console.log('3-make-yomitan.js: Done!');

function writeInBatches(inputArray, filenamePrefix, batchSize = 100000) {
console.log(`Writing ${inputArray.length.toLocaleString()} entries...`);
Expand Down
Loading

0 comments on commit 87660d7

Please sign in to comment.