-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathai_multiclass.py
119 lines (91 loc) · 3.96 KB
/
ai_multiclass.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from sklearn import metrics, preprocessing
from sklearn.preprocessing import label_binarize
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from matplotlib import pyplot as plt
from mpi4py import MPI
import sys
from scipy import interp
from sklearn.metrics import auc, confusion_matrix, roc_curve
from sklearn.model_selection import StratifiedKFold, cross_val_predict, cross_validate
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sn
from itertools import cycle
import pickle
from scipy.io import arff
import matplotlib
matplotlib.use('Agg')
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
methods = ["RandomForest", "SVM", "MLP", "NaiveBayes"]
methods = methods[:size]
#training
all_acc = []
all_f1micro = []
all_f1macro = []
all_f1w = []
all_precisionmicro = []
all_precisionmacro = []
all_precisionw = []
all_recallmicro = []
all_recallmacro = []
all_recallw = []
all_precision = []
all_recall = []
conf_matrix_list_of_arrays = []
yall = np.array([])
yall_score = []
for i in range(1,16):
print("\nDataset nr %d, %s algorithm\n"%(i, methods[rank]))
sys.stdout.flush()
X = arff.loadarff("C:\\Users\\bhadr\\Desktop\\multiclass\data%d Sampled Scenarios.csv.arff"%i)
X = pd.DataFrame(X[0], dtype='float64')
#X = pd.read_csv("Data/data%d.csv"%i) #read file
X = X.replace(np.inf, np.finfo(np.float32).max) #replacing 'inf' with its equivalent in float32 datatype
#preparing the label converter
# le = preprocessing.LabelEncoder()
# le.fit(labels)
#assigning the training data and the labels into variables
# y = le.transform(X['marker'])
y = X['marker'].values
X = X.drop(columns='marker').values
clf = []
clf.insert(len(clf), RandomForestClassifier(n_estimators=100, max_features='log2')) #Random Forest classifier initialization
clf.insert(len(clf), SVC(probability=True, max_iter=1000, cache_size=7000))
clf.insert(len(clf), MLPClassifier(hidden_layer_sizes=(20,), max_iter=1000, early_stopping=True))
clf.insert(len(clf), GaussianNB())
cv = StratifiedKFold(n_splits=10)
scores = cross_validate(clf[rank], X, y, cv=cv, scoring=['accuracy','f1_micro','f1_macro','f1_weighted','precision_micro','precision_macro','precision_weighted', 'recall_micro','recall_macro','recall_weighted'], n_jobs=2)
#GridSearchCV
y_pred = cross_val_predict(clf[rank], X, y, cv=cv, n_jobs=3)
all_acc.append(np.average(scores['test_accuracy']))
all_f1micro.append(np.average(scores["test_f1_micro"]))
all_f1macro.append(np.average(scores["test_f1_macro"]))
all_f1w.append(np.average(scores["test_f1_weighted"]))
all_precisionmicro.append(np.average(scores["test_precision_micro"]))
all_precisionmacro.append(np.average(scores["test_precision_macro"]))
all_precisionw.append(np.average(scores["test_precision_weighted"]))
all_recallmicro.append(np.average(scores["test_recall_micro"]))
all_recallmacro.append(np.average(scores["test_recall_macro"]))
all_recallw.append(np.average(scores["test_recall_weighted"]))
print("\n%s algorithm done!\n"%(methods[rank]))
sys.stdout.flush()
comm.Barrier()
all_acc = comm.gather(all_acc)
all_f1micro = comm.gather(np.average(all_f1micro))
all_f1macro = comm.gather(np.average(all_f1macro))
all_f1w = comm.gather(np.average(all_f1w))
all_precisionmicro = comm.gather(np.average(all_precisionmicro))
all_precisionmacro = comm.gather(np.average(all_precisionmacro))
all_precisionw = comm.gather(np.average(all_precisionw))
all_recallmicro = comm.gather(np.average(all_recallmicro))
all_recallmacro = comm.gather(np.average(all_recallmacro))
all_recallw = comm.gather(np.average(all_recallw))
if rank == 0:
with open('output_multiclass.pickle', 'wb') as results:
pickle.dump([all_acc, all_f1micro, all_f1macro, all_f1w, all_precisionmicro, all_precisionmacro, all_precisionw, all_recallmicro, all_recallmacro, all_recallw], results)