-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHistogram.h
executable file
·70 lines (53 loc) · 1.64 KB
/
Histogram.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
// Part of FixedHistogram class is from https://github.com/DELTA37/ParallelDecisionTree
#include <vector>
#include <functional>
#include <map>
#include <list>
#include <utility>
#include <numeric>
#include <cassert>
#include <iostream>
#include <queue>
#include "DecisionTree.h"
#define W 2
#define THRESHOLD 0.01
/* For a fixed number of data, using different number of bins doesn't change
the test accuracy much. Therefore, we usually choose the bin size
to be 1/10 of the input data size.*/
#define Bt 70
using namespace std;
class FixedHistogram {
public:
map<double, int> data;
FixedHistogram(){};
FixedHistogram(int _B, vector<double> const& points, double eps=1e-5);
double sum(double const& b);
void update(double const& p, int c=1);
void merge(FixedHistogram const& h);
vector<double> uniform(int _B);
private:
/*
* We assign B = Bt in the implementation.
*/
int B;
double eps;
// map<double, int> data;
/*
* left and right edges of histogram. We should compute it during execution
*/
double min_point, max_point;
void _reduce(void);
};
struct HistNode {
Node *node;
vector<vector<double>> dataX[W];
vector<int> dataY[W];
};
Node *histTree(vector<vector<double>> x, vector<int> y);
FixedHistogram **compressData(vector<vector<double>> x, vector<int> y,
int uniqueY, int dimension);
vector<int> evalHistTree(Node *node, vector<vector<double>> &xTe);
rfOutput *randomForestHist(vector<vector<double>> x, vector<int> y, int k,
int nt);
vector<int> evalForestHist(rfOutput *forestOutput, int nt,
vector<vector<double>> &xTe);