-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsendWords.js
108 lines (85 loc) · 2.97 KB
/
sendWords.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/*
Is responsible for very first step of fetching words from CSV
and sending to rabbitmq queue "word-pairs"
*/
const fs = require('fs');
const { promisify } = require('util');
const [readFile, writeFile] = [fs.readFile, fs.writeFile].map(promisify);
const publisherClass = require('./class/Publisher');
const Publisher = new publisherClass();
const wordPairQueueName = 'word-pairs';
const filePath = process.argv[2];
const fileExtension = filePath.split('.')[1];
const allowedFileExtensions = ['csv', 'json'];
if (!filePath) throw new Error('must specify file path to find csv or json of words');
if (!allowedFileExtensions.includes(fileExtension)) throw new Error('file must be either csv or json')
async function fetchWords () {
try {
const file = await readFile(filePath, { encoding: 'utf8' });
const pairs = fileExtension === 'csv' ? parseCsv(file) : parseJson(file);
return pairs;
// returns words from csv
function parseCsv(rawFileString) {
const lines = file.split('\n').filter(filterCsvLine);
// sometimes translations can have multiple spanish words as meanings
// but there can only be one for prounciation
// syntax of csv allows for >> to denote this
// for example, mustache, el bigote / mostacho >> mostacho
const words = lines.map(line => {
let spanishPronunciationWord;
let lineSplit = line.split(',').map(str => str.trim());
if (line.includes('>>')) {
spanishPronunciationWord = line.split('>>')[1].trim();
lineSplit[1] = lineSplit[1].split('>>')[0].trim();
}
else {
// sometimes, spanish translation should be taken as whole fragment
// example: cuerpo humano
// otherwise, takes last word, like "nino" from "el nino"
const shouldTakeWholeFragment = lineSplit[1].includes('<') && lineSplit[1].includes('>');
spanishPronunciationWord = shouldTakeWholeFragment
? (lineSplit[1].split('<')[1].split('>')[0])
: (lineSplit[1].split(' ').reduce((a, b, c, arr) => arr[arr.length - 1]));
}
return {
english: lineSplit[0],
spanish: lineSplit[1].replace(/[<>]/g, ''),
spanishPronunciationWord: spanishPronunciationWord.trim()
};
});
return words;
// only return lines that don't start with comment ("//"), or aren't empty
function filterCsvLine (line) {
return !line.trim().startsWith('//') && line.trim();
}
}
function parseJson(rawFileString) {
const words = JSON.parse(rawFileString);
console.log(words);
return words;
}
}
catch (err) {
throw new Error(`error fetching words: ${err}`);
}
}
async function init () {
await Publisher.init();
}
// MAIN SCRIPT
(async () => {
try {
// establish connections to RMQs, etc
await init();
const pairs = await fetchWords();
console.log(pairs);
pairs.forEach(pair => {
Publisher.sendToQueue(wordPairQueueName, pair);
});
console.log(`${pairs.length} words sent to queue!`);
await Publisher.closeConnection();
}
catch (err) {
console.log('error sending words to queue', err);
}
})();