Skip to content

Commit

Permalink
branding
Browse files Browse the repository at this point in the history
  • Loading branch information
StefanVukovic99 committed Jan 20, 2024
1 parent 1cd315d commit 4b2075a
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 62 deletions.
2 changes: 0 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
MAX_SENTENCES=5000000
DEBUG_WORD=sehen
OPENSUBS_PATH=/path/to/opensubs
DICT_NAME=abc
16 changes: 0 additions & 16 deletions 1-create-folders.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,3 @@
/*
* Copyright (C) 2023 Yezichak Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const {mkdirSync} = require('fs');

const folders = [
Expand Down
41 changes: 12 additions & 29 deletions 3-make-yomitan.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,3 @@
/*
* Copyright (C) 2023 Yezichak Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

const {readFileSync, writeFileSync, existsSync, readdirSync, mkdirSync, createWriteStream, unlinkSync} = require('fs');
const date = require('date-and-time');
const now = new Date();
Expand Down Expand Up @@ -94,14 +77,14 @@ function findModifiedTag(tag){
return modifiedTag;
}

const yzk = {
const ymt = {
lemma: [],
form: [],
ipa: [],
dict: []
};

const yzkTags = {
const ymtTags = {
ipa: {},
dict: {}
};
Expand Down Expand Up @@ -181,13 +164,13 @@ for (const [lemma, infoMap] of Object.entries(lemmaDict)) {

if (fullTag) {
recognizedTags.push(fullTag[0]);
yzkTags.dict[tag] = fullTag;
ymtTags.dict[tag] = fullTag;
return null;
} else {
const modifiedTag = findModifiedTag(tag);
if (modifiedTag){
recognizedTags.push(modifiedTag[0]);
yzkTags.dict[tag] = modifiedTag;
ymtTags.dict[tag] = modifiedTag;
} else {
if (allEntryTags.some((otherTag) => otherTag !== tag && otherTag.includes(tag))) return null;
incrementCounter(tag, skippedTermTags)
Expand All @@ -208,7 +191,7 @@ for (const [lemma, infoMap] of Object.entries(lemmaDict)) {

debug(entries);
for (const [tags, entry] of Object.entries(entries)) {
yzk.lemma.push(entry);
ymt.lemma.push(entry);
}
}

Expand All @@ -218,7 +201,7 @@ for (const [lemma, infoMap] of Object.entries(lemmaDict)) {
.map((tag) => {
const fullTag = findTag(ipaTags, tag);
if (fullTag){
yzkTags.ipa[tag] = fullTag;
ymtTags.ipa[tag] = fullTag;
return fullTag[0];
} else {
incrementCounter(tag, skippedIpaTags)
Expand All @@ -237,7 +220,7 @@ for (const [lemma, infoMap] of Object.entries(lemmaDict)) {
}, []);

if (mergedIpas.length) {
yzk.ipa.push([
ymt.ipa.push([
normalizedLemma,
'ipa',
{
Expand Down Expand Up @@ -322,7 +305,7 @@ for (const [form, allInfo] of Object.entries(formDict)) {
]);

if(deinflectionDefinitions.length){
yzk.form.push([
ymt.form.push([
normalizeOrthography(form),
'',
'non-lemma',
Expand All @@ -337,13 +320,13 @@ for (const [form, allInfo] of Object.entries(formDict)) {
}
}

yzk.dict = [...yzk.lemma, ...yzk.form];
ymt.dict = [...ymt.lemma, ...ymt.form];

const tempPath = 'data/temp';

const indexJson = {
format: 3,
revision: currentDate,
revision: 'ymt-' + currentDate,
sequenced: true
};

Expand All @@ -360,11 +343,11 @@ for (const folder of folders) {
title: `${DICT_NAME}W-${source_iso}-${target_iso}` + (folder === 'dict' ? '' : '-ipa'),
}));

writeFileSync(`${tempPath}/${folder}/tag_bank_1.json`, JSON.stringify(Object.values(yzkTags[folder])));
writeFileSync(`${tempPath}/${folder}/tag_bank_1.json`, JSON.stringify(Object.values(ymtTags[folder])));

const filename = folder === 'dict' ? 'term_bank_' : 'term_meta_bank_';

writeInBatches(yzk[folder], `${folder}/${filename}`, 25000);
writeInBatches(ymt[folder], `${folder}/${filename}`, 25000);
}

console.log('total ipas', ipaCount, 'skipped ipa tags', Object.values(skippedIpaTags).reduce((a, b) => a + b, 0));
Expand Down
22 changes: 10 additions & 12 deletions auto.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@

source .env

export MAX_SENTENCES
export DEBUG_WORD
export OPENSUBS_PATH
export DICT_NAME

# Check for the source_language and target_language arguments
Expand All @@ -18,7 +16,7 @@ source_all=false
target_all=false
redownload=false
force_tidy=false
force_yez=false
force_ymt=false
force=false

flags=('S' 'T' 'd' 't' 'y' 'F')
Expand All @@ -30,7 +28,7 @@ for flag in "${flags[@]}"; do
'T') target_all=true ;;
'd') redownload=true ;;
't') force_tidy=true ;;
'y') force_yez=true ;;
'y') force_ymt=true ;;
'F') force=true ;;
esac
;;
Expand All @@ -39,19 +37,19 @@ done

if [ "$force" = true ]; then
force_tidy=true
force_yez=true
force_ymt=true
fi

if [ "$force_tidy" = true ]; then
force_yez=true
force_ymt=true
fi

echo "[S] source_all: $source_all"
echo "[T] target_all: $target_all"
echo "[d] redownload: $redownload"
echo "[F] force: $force"
echo "[t] force_tidy: $force_tidy"
echo "[y] force_yez: $force_yez"
echo "[y] force_ymt: $force_ymt"

# Step 1: Install dependencies
npm i
Expand Down Expand Up @@ -154,20 +152,20 @@ for entry in "${entries[@]}"; do
dict_file="${DICT_NAME}W-$source_iso-$target_iso.zip"
ipa_file="${DICT_NAME}W-$source_iso-$target_iso-ipa.zip"

# Step 5: Create Yezichak files
# Step 5: Create Yomitan files
if \
[ ! -f "data/language/$source_iso/$target_iso/$dict_file" ] || \
[ ! -f "data/language/$source_iso/$target_iso/$ipa_file" ] || \
[ "$force_yez" = true ]; then
echo "Creating Yezichak dict and IPA files"
[ "$force_ymt" = true ]; then
echo "Creating Yomitan dict and IPA files"
if node --max-old-space-size=8192 3-make-yomitan.js; then
zip -j "$dict_file" data/temp/dict/index.json data/temp/dict/tag_bank_1.json data/temp/dict/term_bank_*.json
zip -j "$ipa_file" data/temp/ipa/index.json data/temp/ipa/tag_bank_1.json data/temp/ipa/term_meta_bank_*.json
else
echo "Error: Yezichak generation script failed."
echo "Error: Yomitan generation script failed."
fi
else
echo "Yezichak dict already exists. Skipping Yezichak creation."
echo "Yomitan dict already exists. Skipping Yomitan creation."
fi

if [ -f "$dict_file" ]; then
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "kaikki-to-yezichak",
"name": "kaikki-to-yomitan",
"version": "1.0.0",
"main": "create-folders.js",
"scripts": {
Expand All @@ -12,7 +12,7 @@
"date-and-time": "^2.4.2",
"line-by-line": "^0.1.6"
},
"description": "Converts Kaikki JSON to Yezichak compatible dictionary.",
"description": "Converts Kaikki JSON to Yomitan compatible dictionary.",
"devDependencies": {
"jest": "^29.7.0"
}
Expand Down
2 changes: 1 addition & 1 deletion util/kaikki-breakdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
# Add labels and title
plt.xlabel("Source Language (headwords in this language)", fontsize=8)
plt.ylabel("Target Language (glosses in this language)", fontsize=8)
plt.title("yzkW", fontsize=12)
plt.title("ymtW", fontsize=12)

# Save the plot with a higher resolution
plt.savefig("heatmap.png", dpi=300)

0 comments on commit 4b2075a

Please sign in to comment.