Skip to content

Commit

Permalink
Take the dialect into account when searching
Browse files Browse the repository at this point in the history
  • Loading branch information
Willem3141 committed May 28, 2024
1 parent 73300c2 commit f1414b8
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 90 deletions.
2 changes: 1 addition & 1 deletion fraporu/ayvefya/reykunyu.js
Original file line number Diff line number Diff line change
Expand Up @@ -1417,7 +1417,7 @@ function sngäiTìfwusew(initial) {

function doSearchNavi() {
let tìpawm = $('#search-box').val();
$.getJSON('/api/fwew-search', { 'query': tìpawm, 'language': getLanguage() })
$.getJSON('/api/fwew-search', { 'query': tìpawm, 'language': getLanguage(), 'dialect': getDialect() })
.done(function (tìeyng) {
const fromNaviResult = tìeyng['fromNa\'vi'];
const toNaviResult = tìeyng['toNa\'vi'];
Expand Down
2 changes: 1 addition & 1 deletion src/affixList.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ function addAffix(list, affixType, affixString, types) {
if (!affixString.length) {
return;
}
let affix = dictionary.getOfTypes(affixString, types);
let affix = dictionary.getOfTypes(affixString, types, 'FN');
if (affix.length > 0) {
list.push({
'type': affixType,
Expand Down
63 changes: 45 additions & 18 deletions src/dictionary.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ module.exports = {
}

const fs = require('fs');

const dialect = require('./dialect');
const output = require('./output');

try {
Expand All @@ -38,8 +40,8 @@ $ echo "{}" > data/words.json`);
process.exit(1);
}

// dictionary of all Na'vi words in the database
// this is a mapping from strings to (arrays of) IDs in `words`
// dictionaries of all Na'vi words in the database, one per dialect
// each dictionary is a mapping from strings to (arrays of) IDs in `words`
// used for searching
var searchables = {};

Expand All @@ -52,22 +54,47 @@ reload();

// Processes the dictionary data.
function reload() {
searchables = {};
searchables = {
'FN': {},
'RN': {},
'combined': {}
};

for (let i = 0; i < words.length; i++) {
let word = words[i];

// dialect forms of the word
word['word'] = {
'combined': word["na'vi"],
'FN': dialect.combinedToFN(word["na'vi"]),
'RN': dialect.combinedToRN(word["na'vi"])
};
word['word_raw'] = {
'combined': dialect.makeRaw(word['word']['combined']),
'FN': dialect.makeRaw(word['word']['FN']),
'RN': dialect.makeRaw(word['word']['RN'])
};

word["na'vi"] = word['word_raw']['FN']; // for compatibility reasons

// put the word in the searchables dictionary
let searchable = word["na'vi"].toLowerCase()
.replace(/[-\[\]]/g, '').replaceAll('/', '').replaceAll('ù', 'u'); // TODO replace by word_raw
if (!searchables.hasOwnProperty(searchable)) {
searchables[searchable] = [];
for (let dialect of ['FN', 'RN']) {
let searchable = word['word_raw'][dialect];
if (!searchables[dialect].hasOwnProperty(searchable)) {
searchables[dialect][searchable] = [];
}
searchables[dialect][searchable].push(i);
if (!searchables['combined'].hasOwnProperty(searchable)) {
searchables['combined'][searchable] = [];
}
if (!searchables['combined'][searchable].includes(i)) {
searchables['combined'][searchable].push(i);
}
}
searchables[searchable].push(i);

// put the word in the wordTypeKeys dictionary
let wordTypeKey = word['type'];
if (wordTypeKey.hasOwnProperty(searchable)) {
let wordTypeKey = word['word_raw']['FN'] + ':' + word['type'];
if (wordTypeKeys.hasOwnProperty(wordTypeKey)) {
output.warning('Duplicate word/type ' + wordTypeKey + ' in words.json');
output.hint(`Reykunyu assumes there cannot be two identical words with the same word
type (as word/type combinations are used to refer to words). Because
Expand All @@ -83,9 +110,9 @@ function getById(id) {
}

// Returns the given word of the given type.
function get(word, type) {
if (searchables.hasOwnProperty(word)) {
for (let id of searchables[word]) {
function get(word, type, dialect) {
if (searchables[dialect].hasOwnProperty(word)) {
for (let id of searchables[dialect][word]) {
let result = words[id];
if (result['type'] === type) {
return result;
Expand All @@ -97,10 +124,10 @@ function get(word, type) {

// Returns the given word of one of the given types. This returns an array
// because more than one type may match.
function getOfTypes(word, types) {
function getOfTypes(word, types, dialect) {
let results = [];
for (let type of types) {
let result = get(word, type);
let result = get(word, type, dialect);
if (result) {
results.push(result);
}
Expand All @@ -110,10 +137,10 @@ function getOfTypes(word, types) {

// Returns the given word that is not one of the given types. This returns an
// array because more than one type may match.
function getNotOfTypes(word, types) {
function getNotOfTypes(word, types, dialect) {
let results = [];
if (searchables.hasOwnProperty(word)) {
for (let id of searchables[word]) {
if (searchables[dialect].hasOwnProperty(word)) {
for (let id of searchables[dialect][word]) {
let result = words[id];
if (!types.includes(result['type'])) {
results.push(JSON.parse(JSON.stringify(result)));
Expand Down
35 changes: 20 additions & 15 deletions src/preprocess.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,32 @@ module.exports = {

// normalizes a query by replacing weird Unicode tìftang variations by
// normal ASCII ', and c -> ts / g -> ng
function preprocessQuery(query) {
function preprocessQuery(query, dialect) {
query = query.trim();
query = query.replace(//g, "'");
query = query.replace(//g, "'");
query = query.replaceAll("\u{AD}", "");

query = query.replace(/sh/g, "sy");
query = query.replace(/Sh/g, "Sy");
query = query.replace(/ch/g, "tsy");
query = query.replace(/Ch/g, "Tsy");
query = query.replace(/b/g, "px");
query = query.replace(/B/g, "Px");
query = query.replace(/d/g, "tx");
query = query.replace(/D/g, "Tx");
query = query.replace(/-g/g, "kx");
query = query.replace(/-G/g, "Kx");
query = query.replace(/·g/g, "kx");
query = query.replace(/·G/g, "Kx");
query = query.replace(/(?<![Nn])g/g, "kx");
query = query.replace(/(?<![Nn])G/g, "Kx");
query = query.replace(/·/g, "");
query = query.replaceAll("\u{AD}", "");
query = query.replace(/ù/g, "u");
query = query.replace(/Ù/g, "U");

if (dialect !== 'RN') {
query = query.replace(/b/g, "px");
query = query.replace(/B/g, "Px");
query = query.replace(/d/g, "tx");
query = query.replace(/D/g, "Tx");
query = query.replace(/-g/g, "kx");
query = query.replace(/-G/g, "Kx");
query = query.replace(/·g/g, "kx");
query = query.replace(/·G/g, "Kx");
query = query.replace(/(?<![Nn])g/g, "kx");
query = query.replace(/(?<![Nn])G/g, "Kx");
query = query.replace(/·/g, "");
query = query.replace(/ù/g, "u");
query = query.replace(/Ù/g, "U");
}

return query;
}
Loading

0 comments on commit f1414b8

Please sign in to comment.