-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.cpp
53 lines (41 loc) · 1.64 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#include <iostream>
#include "spellchecker.h"
#include "tweetscleaner.h"
#include <fstream>
#include <boost/concept_check.hpp>
using namespace casimiro;
StringUnorderedSet LoadDictFromFile(const std::string& _fileName)
{
StringUnorderedSet dict;
std::ifstream file(_fileName);
std::string line;
while(std::getline(file, line))
dict.insert(line);
return dict;
}
void cleanTweetsGroupingByUser(SpellChecker& speller, StringUnorderedSets& foreignDicts, DelafDict& DelafDict, TweetsCleaner& cleaner)
{
bool spelling = false;
int minChoosenWords = 30;
double maxUnknownWordsRate = 0.3;
cleaner.cleanTweetsGroupingByUser("/home/casimiro/tweet_sorted_dump", "data/tweets_cleaned_dump_grouped_by_user", spelling, minChoosenWords, maxUnknownWordsRate);
}
void cleanTweets(SpellChecker& speller, StringUnorderedSets& foreignDicts, DelafDict& DelafDict, TweetsCleaner& cleaner)
{
bool spelling = false;
int minChoosenWords = 4;
double maxUnknownWordsRate = 0.5;
cleaner.cleanTweets("/home/casimiro/tweet_sorted_dump", "data/tweets_cleaned_dump_grouped_by_user", spelling, minChoosenWords, maxUnknownWordsRate);
}
int main(int argc, char **argv) {
SpellChecker speller;
speller.prepare();
StringUnorderedSets foreignDicts;
//foreignDicts.push_back(LoadDictFromFile("data/en.dict"));
//foreignDicts.push_back(LoadDictFromFile("data/es.dict"));
DelafDict delafDict;
delafDict.loadFromFile("data/delaf.dict");
auto cleaner = TweetsCleaner(delafDict, foreignDicts, speller);
cleanTweetsGroupingByUser(speller, foreignDicts, delafDict, cleaner);
return 0;
}