Skip to content

Commit

Permalink
Adding french lexicon
Browse files Browse the repository at this point in the history
  • Loading branch information
hellpanderrr committed Jul 1, 2024
1 parent 712b179 commit 6318e90
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 35 deletions.
1 change: 1 addition & 0 deletions wiktionary_pron/scripts/lexicon.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ async function loadLexicon(language) {
const languages = {
German: "german_lexicon.zip",
Czech: "czech_lexicon.zip",
French: "french_lexicon.zip",
};
const lexiconFolder = "./utils/";

Expand Down
24 changes: 19 additions & 5 deletions wiktionary_pron/scripts/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@ import {
asyncMapStrict,
clearStorage,
createElementFromHTML,
disableAll,
enableAll,
get_ipa_no_cache,
memoizeLocalStorage,
wait,
enableAll,
disableAll,
} from "./utils.js";
import { tts } from "./tts.js";
import { toPdf } from "./pdf_export.js";
import { loadLexicon } from "./lexicon.js";
import { macronize } from "./macronizer.js";

document.querySelector("#lang").disabled = false;

async function prepareTranscribe(lang) {
Expand Down Expand Up @@ -492,31 +493,44 @@ async function updateOptionsUponLanguageSelection(event) {
const selectedLanguageElement = event.target;
const selectedLanguage = selectedLanguageElement.value;
const lang = languages[selectedLanguage];
const urlParams = new URLSearchParams(window.location.search);
let useDictionary = urlParams.get("dict");
if (useDictionary === null) {
useDictionary = "true";
}

try {
window.history.pushState({}, "", `?lang=${selectedLanguage}`);
if (urlParams.get("lang") !== selectedLanguage) {
window.history.pushState({}, "", `?lang=${selectedLanguage}`);
}
} catch (err) {
console.log(err);
}
if (!(selectedLanguage in loadedLanguages)) {
disableAll();
await loadLanguage(lang.langCode);
globalThis.lexicon = null;
if (selectedLanguage === "Latin") {
updateLoadingText("Macrons list", "");
await macronize("");
updateLoadingText("", "");
}

if (selectedLanguage === "German") {
if (selectedLanguage === "German" && useDictionary === "true") {
updateLoadingText("German lexicon", "");
globalThis.lexicon = await loadLexicon("German");
updateLoadingText("", "");
}
if (selectedLanguage === "Czech") {
if (selectedLanguage === "Czech" && useDictionary === "true") {
updateLoadingText("Czech lexicon", "");
globalThis.lexicon = await loadLexicon("Czech");
updateLoadingText("", "");
}
if (selectedLanguage === "French" && useDictionary === "true") {
updateLoadingText("French lexicon", "");
globalThis.lexicon = await loadLexicon("French");
updateLoadingText("", "");
}

enableAll();
loadedLanguages[selectedLanguage] = true;
Expand Down
77 changes: 47 additions & 30 deletions wiktionary_pron/scripts/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@ async function asyncMapStrict(arr, fn) {

function sanitize(text) {
return text
.replace(
/[^\p{L}\p{M}'pbtdʈɖcɟkɡqɢʔmɱnɳɲŋɴʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟʘǀǃǂǁɓɗʄɠʛʼiyɨʉɯuɪʏʊeøɘɵɤoəɛœɜɞʌɔæɐaɶɑɒʍwɥʜʢʡɕʑɺɧ͜͡ˈˌːˑ̆|‖.‿̥̬ʰ̹̜̟̠̩̯̈̽˞̤̰̼ʷʲˠˤ̴̝̞̘̙̪̺̻̃ⁿˡ̋̚˥̌˩́˦̂̄˧᷄̀˨᷅̏᷈-]/gu,
"",
)
.replace(/[^\p{L}\p{M}'’-]/gu, "")
.replaceAll("’", "'")
.normalize("NFKC");
}

Expand Down Expand Up @@ -85,8 +83,8 @@ function clearStorage() {
}

function get_ipa_no_cache(text, args) {
console.log("doing actual IPA", text, args);
const cleanText = sanitize(text);
console.log("doing actual IPA", text, cleanText, args);

const [lang, langStyle, langForm] = args.split(";");
let command = "";
Expand Down Expand Up @@ -116,18 +114,20 @@ function get_ipa_no_cache(text, args) {
break;
case "German":
if (langForm === "Phonemic") {
let dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
);
if (!dictRecord) {
dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
if (globalThis.lexicon) {
let dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
);
}
console.log(cleanText, dictRecord);
if (dictRecord) {
command = 'ipa="' + dictRecord + '";';
break;
if (!dictRecord) {
dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
);
}
console.log(cleanText, dictRecord);
if (dictRecord) {
command = 'ipa="' + dictRecord + '";';
break;
}
}
}
command =
Expand All @@ -150,6 +150,21 @@ function get_ipa_no_cache(text, args) {
break;
case "French":
if (langForm === "Phonemic") {
if (globalThis.lexicon) {
let dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
);
if (!dictRecord) {
dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
);
}
console.log(cleanText, dictRecord);
if (dictRecord) {
command = 'ipa="' + dictRecord + '";';
break;
}
}
command = `(window.fr_ipa.show("${cleanText}")[0])`;
}

Expand All @@ -166,20 +181,21 @@ function get_ipa_no_cache(text, args) {
break;
case "Czech":
if (langForm === "Phonemic") {
let dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
);
if (!dictRecord) {
dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
if (globalThis.lexicon) {
let dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
);
if (!dictRecord) {
dictRecord = globalThis.lexicon.get(
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
);
}
console.log(cleanText, dictRecord);
if (dictRecord) {
command = 'ipa="' + dictRecord + '";';
break;
}
}
console.log(cleanText, dictRecord);
if (dictRecord) {
command = 'ipa="' + dictRecord + '";';
break;
}

command = `(window.cs_ipa.toIPA("${cleanText}"))`;
}
break;
Expand Down Expand Up @@ -227,10 +243,11 @@ function get_ipa_no_cache(text, args) {
if (!ipa) {
return { value: text, status: "error" };
}

console.log("before replace ipa ", ipa);
if (langStyle === "Parisian (experimental)") {
ipa = ipa
.replace("ɔ̃̃̃̃̃", "õ")
.replace("ɔ̃̃̃̃̃̃", "õ")
.replace("ɔ̃", "õ")
.replace("ɑ̃", "ɔ̃")
.replace("œ̃", "ɑ̃")
.replace("ɛ̃", "ɑ̃");
Expand Down
Binary file added wiktionary_pron/utils/french_lexicon.zip
Binary file not shown.

0 comments on commit 6318e90

Please sign in to comment.