forked from henryzord/eel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
147 lines (125 loc) · 4.88 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""
Runs a single instance of EEL.
If an exception occurs during this execution, the program will output the exception message to an file, into the
output_path directory.
Command line parameters:
* path_datasets: a full path (including file type, i.e. dataset.arff) to the dataset to be run.
* path_metadata: path to output metadata regarding evolutionary process.
* path_params: path to parameters file.
* n_fold: number of fold to run in this execution. Must be smaller than the value in the params file.
* n_run: number of the current run. Note: it is not the total amount of runs!
"""
import argparse
import json
import numpy as np
from eda import Ensemble
from eda.integration import integrate
from reporter import EDAReporter
from utils import __get_fold__, get_dataset_name
from data_normalization import DataNormalizer
def eelem(dataset_path, output_path, params_path, n_fold, n_run, verbose=True):
"""
Runs a single instance of EEL.
:type dataset_path: str
:param dataset_path: a full path (including file type, i.e. dataset.arff) to the dataset to be run.
:type output_path: str
:param output_path: path to output metadata regarding evolutionary process.
:type params_path: str
:param params_path: path to parameters file.
:type n_fold: int
:param n_fold: number of fold to run in this execution. Must be smaller than the value in the params file.
:type n_run: int
:param n_run: number of the current run.
:type verbose: bool
:param verbose: whether to output metadata to console. Defaults to True.
"""
params = json.load(open(params_path))
dataset_name = get_dataset_name(dataset_path)
X_train, X_test, y_train, y_test = __get_fold__(params=params, dataset_path=dataset_path, n_fold=n_fold)
n_classes = len(np.unique(y_train))
reporter = EDAReporter(
Xs=[X_train, X_test],
ys=[y_train, y_test],
set_names=['train', 'test'],
output_path=output_path,
dataset_name=dataset_name,
n_fold=n_fold,
n_run=n_run,
n_classifiers=params['n_base_classifiers'],
n_classes=n_classes,
)
ensemble = Ensemble.from_adaboost(
X_train=X_train, y_train=y_train,
data_normalizer_class=DataNormalizer,
n_classifiers=params['n_base_classifiers'],
) # type: Ensemble
ensemble = integrate(
ensemble=ensemble,
n_individuals=params['n_individuals'],
n_generations=params['n_generations'],
reporter=reporter,
verbose=verbose
)
return ensemble
def preliminaries(dataset_path, output_path, params_path, n_fold, n_run):
"""
Runs a single instance of EEL.
If an exception occurs during this execution, the program will output the exception message to an file, into the
output_path directory.
:type dataset_path: str
:param dataset_path: a full path (including file type, i.e. dataset.arff) to the dataset to be run.
:type output_path: str
:param output_path: path to output metadata regarding evolutionary process.
:type params_path: str
:param params_path: path to parameters file.
:type n_fold: int
:param n_fold: number of fold to run in this execution. Must be smaller than the value in the params file.
:type n_run: int
:param n_run: number of the current run.
"""
dataset_name = get_dataset_name(dataset_path)
# try:
eelem(
dataset_path=dataset_path,
output_path=output_path,
params_path=params_path,
n_fold=n_fold,
n_run=n_run
)
# TODO reactivate later
# except Exception as e:
# name = EDAReporter.get_output_file_name(
# output_path=output_path,
# dataset_name=dataset_name,
# n_fold=n_fold, n_run=n_run,
# reason='exception'
# )
#
# with open(name, 'w') as f:
# f.write(str(e) + '\n' + str(e.args))
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Main script for running Estimation of Distribution Algorithms for ensemble learning.'
)
parser.add_argument(
'-d', action='store', required=True,
help='Path to datasets folder. Datasets must be in .arff format.'
)
parser.add_argument(
'-m', action='store', required=True,
help='Path to metadata folder. The folder must be pre-existent, even if empty.'
)
parser.add_argument(
'-p', action='store', required=True,
help='Path to EEL\'s .json parameter file.'
)
parser.add_argument(
'--n_fold', action='store', required=True, type=int,
help='Index of the fold currently being tested.'
)
parser.add_argument(
'--n_run', action='store', required=True, type=int,
help='Index of the run currently being tested.'
)
args = parser.parse_args()
preliminaries(args.d, args.m, args.p, args.n_fold, args.n_run)