-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.cpp
115 lines (79 loc) · 3.06 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#include <iostream>
#include <fstream>
#include <string>
#include "StringDict.h"
#include "JsonImporter.h"
#include "FeatureStore.h"
#include "FeatureIndex.h"
#include "QueryProcessor.h"
#include <regex>
#include <ctime>
using namespace std;
#define MAX_FEATURE_READ 10000
int main() {
JsonImporter ji("/Users/matt/git/devsearch-lookup/bucket1.json");
StringDict* file_sd = new StringDict();
StringDict* feature_sd = new StringDict();
FeatureStore* db = new FeatureStore(MAX_FEATURE_READ);
string feature, file;
int lineNb = 0;
float reporank = 0.0;
const clock_t begin_import_time = clock();
int count = 0;
while(ji.readFeature(&feature, &file, &reporank, &lineNb) && count < MAX_FEATURE_READ) {
int file_id = file_sd->add(file);
int feature_id = feature_sd->add(feature);
count++;
db->add(file_id, feature_id, reporank, lineNb);
if (count%10000 == 0) {
printf("processed %dK\n", count/1000);
fflush(stdout);
}
}
printf("DB imported in %f sec\n", float( clock () - begin_import_time ) / CLOCKS_PER_SEC);
printf("Feature processed: %d\n", count);
printf("File Count: %d\n", file_sd->size);
printf("File Node Count: %d\n", file_sd->nodeCount());
printf("Feature Count: %d\n", feature_sd->size);
printf("Feature Node Count: %d\n", feature_sd->nodeCount());
//not actually needed, build index will do it
// db->sort();
FeatureIndex* db_index = new FeatureIndex(feature_sd->size, count);
db->buildFeatureIndex(db_index);
const clock_t begin_sort_time = clock();
db->sort();
printf("DB sorted in %f sec\n", float( clock () - begin_sort_time ) / CLOCKS_PER_SEC);
// db->print();
for (int i = 0; i < db->size-1; ++i) {
if(db->at(i)->feature_id > db->at(i+1)->feature_id) {
printf("db has problem\n");
exit(1);
}
}
int qLength = 3;
string query[] = {
"controlStatement=if",
"import=java.util.List",
"controlStatement=for"
};
int keys[qLength];
feature_sd->getBatchKeys(qLength, query, keys);
QueryProcessor* q = new QueryProcessor(db, db_index);
const clock_t begin_query_time = clock();
int take = 10;
int from = 0;
vector<agg_result> finalResult;
int totalMachedFiles;
int totalMachedFeatures;
q->process(qLength, keys, 10, 0, &finalResult, &totalMachedFeatures, &totalMachedFiles);
printf("DB queried in %f sec\n", float( clock () - begin_query_time ) / CLOCKS_PER_SEC);
printf("Found %d features in %d files\n\n--- Best Results ---\n", totalMachedFeatures, totalMachedFiles);
for (vector<agg_result>::iterator it = finalResult.begin(); it != finalResult.end(); it++) {
printf("score:%f\tfileId:%d\t|", (*it).score, (*it).fileId);
for (map<string, double>::iterator mapIt = (*it).scoreBreakdown.begin(); mapIt != (*it).scoreBreakdown.end(); mapIt++) {
printf("\t%s=%f", mapIt->first.c_str(), mapIt->second);
}
cout << endl;
}
return 0;
}