Skip to content

Commit

Permalink
Adding German Lexicon lookup
Browse files Browse the repository at this point in the history
  • Loading branch information
hellpanderrr committed May 14, 2024
1 parent 8974c13 commit ac751b1
Show file tree
Hide file tree
Showing 8 changed files with 336 additions and 37 deletions.
56 changes: 55 additions & 1 deletion wiktionary_pron/css/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
src: url("../fonts/NotoSans-Regular.woff2") format("woff2");
}

@font-face {
font-family: "Noto Sans Semibold";
src: url("../fonts/NotoSans-SemiBold.woff2") format("woff2");
}
@font-face {
font-family: "EB Garamond";
src: url("../fonts/EBGaramond-Regular.woff2") format("woff2");
Expand All @@ -16,6 +20,11 @@ button:enabled:active {
outline: none;
}

.line {
position: relative;

}

hr {
opacity: 0.3;
}
Expand Down Expand Up @@ -53,7 +62,7 @@ div#form_bottom {
align-items: stretch;
}

div#clear_button_group{
div#clear_button_group {
display: flex;
flex-direction: column;
}
Expand Down Expand Up @@ -196,17 +205,62 @@ body i.fa-moon-o {
background-color: #f0f8ff00;
}

.ipa.multiple-values {
font-family: 'Noto Sans Semibold', sans-serif;
cursor: pointer;
}

[all_values]::after {
display: block;
position: absolute;
content: attr(all_values);
border: 1px solid black;
transition: opacity .3s ease;
background: white;
padding: .25em;
z-index: 10000;
font-weight: normal;
white-space: pre;
opacity: 0;
pointer-events: none;
border-radius: 5px;
box-shadow: 2px 2px 1px silver;

}

*[all_values] {
position: relative;
}

*[all_values]:hover::after {
opacity: 1;
z-index: 10000;
}


.show-popup {
visibility: visible;
transition: opacity 0.5s;
}

.audio-popup-line {

position: absolute;
top: 5%;
left: -5%;
transition: opacity 0.5s;
font-size: 15px;
background-color: #f0f8ff00;
opacity: 0;
visibility: hidden;
}

.cell {
position: relative;
display: inline-block;
margin-top: 5px;
margin-left: 5px;
margin-right: 5px;
float: left;
}

Expand Down
1 change: 1 addition & 0 deletions wiktionary_pron/scripts/jszip-utils.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions wiktionary_pron/scripts/jszip.min.js

Large diffs are not rendered by default.

47 changes: 47 additions & 0 deletions wiktionary_pron/scripts/lexicon.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { loadFileFromZip, splitAndAppend } from "./utils.js";

async function loadLexicon(language) {
const languages = {
German: "german_lexicon.zip",
};
const lexiconFolder = "./utils/";
const wordPairsList = await loadFileFromZip(
lexiconFolder + languages[language],
"lexicon.txt",
);

function process_lexicon(text) {
const split = text.split(/\r?\n/);
const lines = [];

const chunk = 10000;
let index = 0;

function processChunk() {
return new Promise((resolve) => {
let cnt = chunk;
while (cnt-- && index < split.length) {
const [text, ipa] = splitAndAppend(split[index], "\t", 1);
if (!ipa.includes("|")) {
lines.push([text, ipa.split(" ").join("")]);
}
index++;
}
if (index < split.length) {
// simulate async iteration
setTimeout(() => resolve(processChunk()), 1);
} else {
const dict = new Map(lines.reverse());
resolve(dict);
}
});
}

return processChunk();
}

const lexicon = await process_lexicon(wordPairsList);
return lexicon;
}

export { loadLexicon };
3 changes: 2 additions & 1 deletion wiktionary_pron/scripts/lua_init.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,5 @@ async function loadLanguage(code) {
window[code + "_ipa"] = lua.global.get(code);
// Set a JS function to be a global lua function
}
export { loadLanguage };

export { loadLanguage, updateLoadingText };
137 changes: 117 additions & 20 deletions wiktionary_pron/scripts/main.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import { loadLanguage } from "./lua_init.js";
import { loadLanguage, updateLoadingText } from "./lua_init.js";

import {
asyncMapStrict,
clearStorage,
createElementFromHTML,
get_ipa_no_cache,
macronize,
memoizeLocalStorage,
wait,
} from "./utils.js";
import { tts } from "./tts.js";
import { toPdf } from "./pdf_export.js";
import { loadLexicon } from "./lexicon.js";

document.querySelector("#lang").disabled = false;

Expand Down Expand Up @@ -60,18 +62,28 @@ async function transcribe(mode) {

async function processWord(word) {
console.log("processing", word);
const { status, value } = await getIpa(word, lang, langStyle, langForm);
let { status, value } = await getIpa(word, lang, langStyle, langForm);
let values = "";
if (lang === "German") {
[value, values] = processGermanIpa(value);
}

const div = document.createElement("div");
div.className = "cell";

const span = document.createElement("span");
const ttsButton = document.createElement("button");
ttsButton.className = "fa fa-volume-down audio-popup";
div.appendChild(ttsButton);
span.className = status === "error" ? "error" : "";
span.setAttribute("data-word", word);

span.appendChild(document.createTextNode(value + " "));
let spanHTML = "";
const spanClass = status === "error" ? "error" : "ipa";
if (values !== "") {
spanHTML = `<span class="${spanClass}" data-word="${word}" all_values="${values}">${value} </span>`;
} else {
spanHTML = `<span class="${spanClass}" data-word="${word}">${value} </span>`;
}

const span = createElementFromHTML(spanHTML);

div.appendChild(span);
if (word.includes("\n")) {
Expand All @@ -88,6 +100,19 @@ async function transcribe(mode) {
);
}

function processGermanIpa(value) {
let values = "";
if (value.includes("/,")) {
console.log(value);
values = value.split("/,");
values = values.map((value) => value.replace("/", "").replace("/", ""));
value = values[0];
values = values.join("&#xa;");
}
value = value.replace("/", "").replace("/", "");
return [value, values];
}

async function processLine(line) {
if (line === "") {
return;
Expand All @@ -102,26 +127,45 @@ async function transcribe(mode) {
}),
);
window.x = results;
const formattedResults = results.map(({ ipa }) =>
result.status === "error"
? `<div class="error">${ipa.value} </div>`
: `<div class="ipa">${ipa.value} </div>`,
);

const formattedResults = results.map(({ ipa }) => {
let values;
let value;
value = ipa.value;
if (lang === "German") {
[value, values] = processGermanIpa(value);
} else {
values = "";
}

console.log(values);
return result.status === "error"
? `<div class="error">${value} </div>`
: Boolean(values)
? `<div class="ipa" all_values="${values}">${value} </div>`
: `<div class="ipa">${value} </div>`;
});

const newRow = resultDiv.insertRow(-1);
newRow.className = "line";
const formattedWords = words.map(
(word) => `<div class="input_text">${word} </div>`,
);
const combinedResults = formattedResults.map(
(formattedResult, index) =>
'<div class="cell"style="float:left;margin-left:5px;margin-top:5px;"><button class="fa fa-volume-down audio-popup"></button>' +
'<div class="cell""><button class="fa fa-volume-down audio-popup"></button>' +
formattedWords[index] +
formattedResult +
"</div>",
);
combinedResults
.reverse()
.map((r) => newRow.insertAdjacentHTML("afterbegin", r));

const ttsButton = document.createElement("button");
ttsButton.className = "fa fa-volume-down audio-popup-line";

newRow.prepend(ttsButton);
}

async function processColumn(line) {
Expand Down Expand Up @@ -191,13 +235,23 @@ async function transcribe(mode) {

const resultDiv = document.createElement("div");
resultDiv.className = "cell";
const resultSpan = document.createElement("span");
resultSpan.textContent = results[i].value;
resultSpan.setAttribute("data-word", words[i]);
resultSpan.classList.add(
results[i].status === "error" ? "error" : "ipa",
);
resultSpan.style.display = "inline-block";
let value, values;

if (lang === "German") {
[value, values] = processGermanIpa(results[i].value);
} else {
value = results[i].value;
values = "";
}

const spanClass = results[i].status === "error" ? "error" : "ipa";
let spanHTML = "";
if (values) {
spanHTML = `<span class="${spanClass}" style="display: inline-block" data-word="${words[i]}" all_values="${values}">${value} </span>`;
} else {
spanHTML = `<span class="${spanClass}" style="display: inline-block" data-word="${words[i]}">${value} </span>`;
}
const resultSpan = createElementFromHTML(spanHTML);
const ttsResultButton = document.createElement("button");
ttsResultButton.className = "fa fa-volume-down audio-popup";

Expand Down Expand Up @@ -238,6 +292,44 @@ async function transcribe(mode) {
console.log(err);
} finally {
console.log("finally");
if (lang === "German") {
Array.from(document.querySelectorAll(".ipa")).map((x) => {
if (
Boolean(x.getAttribute("all_values")) &&
x.getAttribute("all_values").trim() !== ""
) {
x.classList.add("multiple-values");
}
x.addEventListener("click", (event) => {
const current = event.target;

const all_values = current.getAttribute("all_values");
if (all_values === "") {
return;
}
const c = event.target.textContent;

function cycle(all_values, current) {
const split = all_values.split("\n");
if (split.length > 1) {
const index = split.indexOf(current.trim());
if (index === split.length - 1) {
return split[0];
} else {
return split[index + 1];
}
}
}

const new_value = cycle(all_values, c);

event.target.textContent = new_value;

current.setAttribute("all_values", all_values);
});
});
}

globalThis.transcriptionMode = mode;
globalThis.transcriptionLang = lang;
enableAll([document.querySelector("#export_pdf")]);
Expand Down Expand Up @@ -317,7 +409,7 @@ const languages = {
},
German: {
styles: ["Default"],
forms: ["Phonetic", "Phonemic"],
forms: ["Phonemic", "Phonetic"],
langCode: "de",
ttsCode: "de-DE",
},
Expand Down Expand Up @@ -410,6 +502,11 @@ async function updateOptionsUponLanguageSelection(event) {
if (!(selectedLanguage in loadedLanguages)) {
disableAll();
await loadLanguage(lang.langCode);
if (selectedLanguage === "German") {
updateLoadingText("German lexicon", "");
globalThis.lexicon = await loadLexicon("German");
updateLoadingText("", "");
}
enableAll();
loadedLanguages[selectedLanguage] = true;
}
Expand Down
Loading

0 comments on commit ac751b1

Please sign in to comment.