Skip to content

Commit

Permalink
latest lemmata, remove console.log etc
Browse files Browse the repository at this point in the history
  • Loading branch information
rsdc2 committed Feb 22, 2024
1 parent 45ab3c9 commit 26c02c4
Show file tree
Hide file tree
Showing 9 changed files with 335 additions and 169 deletions.
360 changes: 254 additions & 106 deletions src/Pure/constants/lemmataGreek.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/Pure/constants/lemmataLatin.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ export const lemmataLatin = {
"Cabaria":"Cabaria",
"Caecinia":"Caecinia",
"Caepario":"Caeparius",
"Caesari":"Caesaris",
"Caesari":"Caesar",
"Cai":"Cai",
"Caia":"Gaia",
"Caio":"Caius",
Expand Down
29 changes: 23 additions & 6 deletions src/Pure/lemmatise.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@

import { lemmataLatin } from "../Pure/constants/lemmataLatin.js"
import { lemmataGreek } from "../Pure/constants/lemmataGreek.js"
import { comparison } from "../Pure/stringedit.js";
import { editDistance } from "../Pure/stringedit.js";

const latinForms = Object.keys(lemmataLatin)
const greekForms = Object.keys(lemmataGreek)
Expand All @@ -25,37 +24,55 @@ export const lemmatise = (lang) =>
if (latinForms.includes(form)) {
return lemmataLatin[form]
} else {

if (form.length < 4) {
return null
}
const editDists = latinForms.map(
/**
*
* @param {string} latinForm
* @returns {[string, number]}
*/
latinForm => [latinForm, comparison([form, latinForm])]
latinForm => [latinForm, editDistance([form, latinForm])]
)

const sorted = editDists.sort( ( [form1, dist1], [form2, dist2]) => dist1 - dist2 )
const [closestForm, dist] = sorted[0]
return lemmataLatin[closestForm]

if (dist <= 1) {
return lemmataLatin[closestForm]
}

return null

}


} else if (lang === "grc") {
if (greekForms.includes(form)) {
return lemmataGreek[form]
} else {
if (form.length < 4) {
return
}
const editDists = greekForms.map(
/**
*
* @param {string} greekForm
* @returns {[string, number]}
*/
greekForm => [greekForm, comparison([form, greekForm])]
greekForm => [greekForm, editDistance([form, greekForm])]
)

const sorted = editDists.sort( ( [form1, dist1], [form2, dist2]) => dist1 - dist2 )
const [closestForm, dist] = sorted[0]
return lemmataGreek[closestForm]

if (dist <= 1) {
return lemmataGreek[closestForm]
}

return null
}
}
}
80 changes: 40 additions & 40 deletions src/Pure/stringedit.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,48 +31,48 @@ function sameLength(s1, s2) {
* @returns {number}
*/

export function editDistance([s1, s2]) {
// export function editDistance([s1, s2]) {


if (s1 === s2) {
return 0
}
// if (s1 === s2) {
// return 0
// }

if (s1.length === 0 && s2.length !== 0) {
return s2.length
}
// if (s1.length === 0 && s2.length !== 0) {
// return s2.length
// }

if (s1.length !== 0 && s2.length === 0) {
return s1.length
}
// if (s1.length !== 0 && s2.length === 0) {
// return s1.length
// }

if (s1.length === s2.length) {
return sameLength(s1, s2)
}
// if (s1.length === s2.length) {
// return sameLength(s1, s2)
// }

const lengthDiff = s1.length - s2.length
// const lengthDiff = s1.length - s2.length

const arr1D = new Array(s1.length + 1)
// const arr1D = new Array(s1.length + 1)


const dist = arr1D.map(
(elem) => {
const arr = new Array(s2.length + 1)
arr.map( elem => elem = 0)
elem = arr
}
)
// const dist = arr1D.map(
// (elem) => {
// const arr = new Array(s2.length + 1)
// arr.map( elem => elem = 0)
// elem = arr
// }
// )

for (let i=0; i<=s1.length; i++) {
dist[i][0] = i
}
// for (let i=0; i<=s1.length; i++) {
// dist[i][0] = i
// }

for (let j=0; j<=s1.length; j++) {
dist[j][0] = j
}
// for (let j=0; j<=s1.length; j++) {
// dist[j][0] = j
// }


}
// }


/**
Expand Down Expand Up @@ -125,7 +125,7 @@ export function findClosestZero(arr2d, [startX, startY]) {
*
* @param {[string, string]} param0
*/
export function comparison([s1, s2]) {
export function editDistance([s1, s2]) {

if (s1 === s2) {
return 0
Expand Down Expand Up @@ -157,7 +157,7 @@ export function comparison([s1, s2]) {

const arr = Arr.arr2d([s1Len, s2Len], 0)

console.log(arr)
// console.log(arr)

for (let i=0; i<s1Len; i++) {
for (let j=0; j<s2Len; j++) {
Expand All @@ -177,9 +177,9 @@ export function comparison([s1, s2]) {
let i = 0, j = 0;
let cost = 0;

console.log("x: ", s2Len, ", y: ", s1Len)
console.log(dists)
console.log(i, j, cost)
// console.log("x: ", s2Len, ", y: ", s1Len)
// console.log(dists)
// console.log(i, j, cost)

while (i < s1Len && j < s2Len) {

Expand All @@ -204,7 +204,7 @@ export function comparison([s1, s2]) {
}

i = i + 1
console.log(i, j, cost, r, b, d)
// console.log(i, j, cost, r, b, d)
continue
}

Expand All @@ -216,14 +216,14 @@ export function comparison([s1, s2]) {
}

j = j + 1
console.log(i, j, cost, r, b, d)
// console.log(i, j, cost, r, b, d)
continue
}

if (d === 0) {
i = i + 1
j = j + 1
console.log(i, j, cost, r, b, d)
// console.log(i, j, cost, r, b, d)
continue
}

Expand All @@ -236,19 +236,19 @@ export function comparison([s1, s2]) {
if (d != null) {
i = i + 1
j = j + 1
console.log(i, j, cost, r, b, d)
// console.log(i, j, cost, r, b, d)
continue
}

if (b == null) {
i = i + 1
console.log(i, j, cost, r, b, d)
// console.log(i, j, cost, r, b, d)
continue
}

if (r == null) {
j = j + 1
console.log(i, j, cost, r, b, d)
// console.log(i, j, cost, r, b, d)
continue
}

Expand Down
15 changes: 9 additions & 6 deletions src/SideEffects/epidoc/textElem.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ import Compress from "../../Pure/compress.js";
import Convert from "../../Pure/convert.js";
import Validator from "../../Pure/validator.js";
import "../../Types/typedefs.js"
import { lemmataLatin } from "../../Pure/constants/lemmataLatin.js"
import { lemmataGreek } from "../../Pure/constants/lemmataGreek.js"
import { ISicElementIDError } from "../../Errors/isicElementIDError.js";
import { lemmatise } from "../../Pure/lemmatise.js";

Expand Down Expand Up @@ -96,10 +94,15 @@ export default class TextElem extends EpiDocElem {
* or <div type="edition">
*/
lemmatise() {
this.elem.setAttribute(
"lemma",
lemmatise(this.xmlLang)(this.form)
)
const lemma = lemmatise(this.xmlLang)(this.form)

if (lemma != null) {
this.elem.setAttribute(
"lemma",
lemmatise(this.xmlLang)(this.form)
)
}

}

removeXMLID() {
Expand Down
2 changes: 1 addition & 1 deletion tests/Pure/lemmatise.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import Parametrized from "../utils/parametrized.mjs"
const { parametrize } = Parametrized
import { lemmatise } from "../../src/Pure/lemmatise.js"

import { comparison } from "../../src/Pure/stringedit.js"
import { editDistance } from "../../src/Pure/stringedit.js"

const lemmatiseLatinTests = /** @type {[string, string, string][]}*/ (
[
Expand Down
4 changes: 2 additions & 2 deletions tests/Pure/stringedit.test.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

import Parametrized from "../utils/parametrized.mjs"
import { comparison, findClosestZero } from "../../src/Pure/stringedit.js"
import { editDistance, findClosestZero } from "../../src/Pure/stringedit.js"
import { test } from "node:test"
import assert from "node:assert/strict"

Expand All @@ -19,7 +19,7 @@ const stringEditTests = /** @type {[[string, string], number, string][]}*/ (
// [["abczzzabc", "abcabc"], 3, "abczzzabc -> abcabc"]
])

parametrize(stringEditTests, comparison)
parametrize(stringEditTests, editDistance)


// const findNearestZeroTests = /** @type {[number[][], [number, number], string][]}*/ ([
Expand Down
6 changes: 3 additions & 3 deletions tests/utils/file.harness.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ function loadFile() {
const epidoc = loadEpiDoc(
"../files/input/ISic000001_valid.xml"
)
console.log(epidoc.processingInstructions)
console.log(epidoc.XMLDeclaration)
// console.log(epidoc.processingInstructions)
// console.log(epidoc.XMLDeclaration)
epidoc.createXMLDeclaration({throwOnFail: true})

// epidoc.doc.insertBefore(new Text("\n"), epidoc.doc.firstChild)
// console.log(epidoc.processingInstructions.map(item => item.data))
console.log(epidoc.XMLDeclaration.data)
// console.log(epidoc.XMLDeclaration.data)
// console.log(cwd())
}

Expand Down
6 changes: 2 additions & 4 deletions tests/utils/lemmata.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import { writeFileSync } from "node:fs"

import { lemmataLatin } from "../../src/Pure/constants/lemmataLatin.js"
import {sort, prettyPrint, formatAsVariable, JSONToObj} from "./object.mjs"


Expand Down Expand Up @@ -29,6 +27,6 @@ function writeLemmataFromJSONToJS(src, varName, dst) {
writeLemmataFromVarToJS(lemmata, varName, dst)
}

writeLemmataFromJSONToJS("greek_lemmata", "greek_lemmata", "greek_lemmata")

writeLemmataFromJSONToJS("greek_lemmata", "lemmataGreek", "lemmataGreek")
// writeLemmataFromJSONToJS("latin_lemmata", "lemmataLatin", "lemmataLatin")
// writeLemmataFromJSONToJS("latin_lemmata", "latin_lemmata", "latin_lemmata")

0 comments on commit 26c02c4

Please sign in to comment.