Skip to content

Commit

Permalink
Moving lexicon loading to a web worker.
Browse files Browse the repository at this point in the history
  • Loading branch information
hellpanderrr committed May 15, 2024
1 parent 3ce1194 commit db31d95
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 34 deletions.
34 changes: 8 additions & 26 deletions wiktionary_pron/scripts/lexicon.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { loadFileFromZip, splitAndAppend } from "./utils.js";
import { loadFileFromZip } from "./utils.js";

async function loadLexicon(language) {
const languages = {
Expand All @@ -11,33 +11,15 @@ async function loadLexicon(language) {
);

function process_lexicon(text) {
const split = text.split(/\r?\n/);
const lines = [];
return new Promise((resolve) => {
const worker = new Worker("scripts/lexicon_loader_worker.js");

const chunk = 10000;
let index = 0;
worker.onmessage = function (e) {
resolve(e.data);
};

function processChunk() {
return new Promise((resolve) => {
let cnt = chunk;
while (cnt-- && index < split.length) {
const [text, ipa] = splitAndAppend(split[index], "\t", 1);
if (!ipa.includes("|")) {
lines.push([text, ipa.split(" ").join("")]);
}
index++;
}
if (index < split.length) {
// simulate async iteration
setTimeout(() => resolve(processChunk()), 1);
} else {
const dict = new Map(lines.reverse());
resolve(dict);
}
});
}

return processChunk();
worker.postMessage(text);
});
}

const lexicon = await process_lexicon(wordPairsList);
Expand Down
16 changes: 16 additions & 0 deletions wiktionary_pron/scripts/lexicon_loader_worker.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
self.onmessage = function (e) {
const splitAndAppend = (str, delim, count) => {
const index = str.indexOf(delim);
return [str.slice(0, index), str.slice(index + 1)];
};
const split = e.data.split(/\r?\n/);
const lines = [];

for (let index = 0; index < split.length; index++) {
const [text, ipa] = splitAndAppend(split[index], "\t", 1);
lines.push([text, ipa.split(" ").join("")]);
}

const dict = new Map(lines.reverse());
self.postMessage(dict);
};
10 changes: 2 additions & 8 deletions wiktionary_pron/scripts/utils.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
const splitAndAppend = (str, delim, count) => {
const arr = str.split(delim);
return [...arr.splice(0, count), arr.join(delim)];
};

async function asyncMapStrict(arr, fn) {
const result = [];
// console.time("Elapsed time :");
Expand Down Expand Up @@ -164,11 +159,11 @@ function get_ipa_no_cache(text, args) {
case "German":
if (langForm === "Phonemic") {
let dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
);
if (!dictRecord) {
dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
);
}
console.log(cleanText, dictRecord);
Expand Down Expand Up @@ -330,7 +325,6 @@ export {
memoizeLocalStorage,
macronize,
loadJs,
splitAndAppend,
loadFileFromZip,
createElementFromHTML,
};

0 comments on commit db31d95

Please sign in to comment.