-
Notifications
You must be signed in to change notification settings - Fork 0
/
h2o_byor_FPGA.py
121 lines (100 loc) · 4.24 KB
/
h2o_byor_FPGA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""Xelera Random Forest (RandomForest) model"""
import datatable as dt
import numpy as np
from h2oaicore.models import CustomModel
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from h2oaicore.systemutils import physical_cores_count
import sys
import os
#make Xelera library accessible to python backend
sys.path.append("/app")
os.environ["H2O_XELERA"] = "H2O"
import XlRFInference
import os
from scipy import stats
import os
import time;
import sys
import getpass
import ctypes as C
class RandomForestModel(CustomModel):
_regression = True
_binary = True
_multiclass = True
_display_name = "Xelera RF FPGA Inference"
_description = "Random Forest Model: training with cpu-based (sklearn), inference with FPGA-based (Xelera)"
def set_default_params(self, accuracy=None, time_tolerance=None,
interpretability=None, **kwargs):
# Fill up parameters we care about
self.params = dict(random_state=kwargs.get("random_state", 1234),
n_estimators=min(kwargs.get("n_estimators", 100), 1000),
criterion="gini" if self.num_classes >= 2 else "mse",
n_jobs=self.params_base.get('n_jobs', max(1, physical_cores_count)))
def mutate_params(self, accuracy=10, **kwargs):
if accuracy > 8:
estimators_list = [100, 200, 300, 500, 1000, 10000]
elif accuracy >= 5:
estimators_list = [50, 100, 200, 300, 400, 500]
else:
estimators_list = [10, 50, 100, 150, 200, 250, 300]
# Modify certain parameters for tuning
self.params["n_estimators"] = int(np.random.choice(estimators_list))
self.params["criterion"] = np.random.choice(["gini", "entropy"]) if self.num_classes >= 2 \
else np.random.choice(["mse", "mae"])
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs):
orig_cols = list(X.names)
if self.num_classes >= 2:
lb = LabelEncoder()
lb.fit(self.labels)
y = lb.transform(y)
model = RandomForestClassifier(**self.params)
else:
model = RandomForestRegressor(**self.params)
# Replace missing values with a value smaller than all observed values
self.min = dict()
for col in X.names:
XX = X[:, col]
self.min[col] = XX.min1()
if self.min[col] is None or np.isnan(self.min[col]):
self.min[col] = -1e10
else:
self.min[col] -= 1
XX.replace(None, self.min[col])
X[:, col] = XX
assert X[dt.isna(dt.f[col]), col].nrows == 0
X = X.to_numpy()
model.fit(X, y)
importances = np.array(model.feature_importances_)
# prepare trees right after fitting and store back formatted trees persistently
xlrfsetup = XlRFInference.XlRFSetup()
xlrfsetup.setTrees(model)
params = xlrfsetup.getModelParameters()
modelTuple = (model, params)
del xlrfsetup
self.set_model_properties(model=modelTuple,
features=orig_cols,
importances=importances.tolist(),
iterations=self.params['n_estimators'])
def predict(self, X, **kwargs):
X = dt.Frame(X)
for col in X.names:
XX = X[:, col]
XX.replace(None, self.min[col])
X[:, col] = XX
modelTuple, _, _, _ = self.get_model_properties()
X = X.to_numpy()
#unpack model
model = modelTuple[0]
params = modelTuple[1]
#re-feed rf library with parameters
xlrf = XlRFInference.XlRFInference('/app/Xl_rf_inference.xclbin')
xlrf.setModelParameters(params)
#FPGA configuration
print('[XELERA] Number of trees: ' + str(len(model.estimators_)) + ", number of samples: ", X.shape)
#FPGA predict
start_time = time.clock()
HW_preds = xlrf.predict(X, len(model.estimators_))
end_time = time.clock()
print("[XELERA] HW FPGA predict time is", end_time - start_time, "seconds")
return HW_preds