-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
56 lines (44 loc) · 1.68 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python3
"""AdaBoost binary classification from scratch in Python"""
__author__ = "João Francisco B. S. Martins"
__email__ = "[email protected]"
__license__ = "GPL"
__version__ = "3.0"
import sys
import numpy as np
import adaboost as ab
import data_handler as dh
def main():
k = 5 # Number of folds
# Dataset retrieving and formatting
dataset = dh.load_dataset("tic-tac-toe.data")
dataset = dh.format_outputs(dataset)
dataset = dh.fold_dataset(dataset, k)
cv_accuracies = []
cv_errors = []
cv_model_errors = []
# Execute k-fold cross-validation
for i in range(k):
print("Round", i + 1)
testing_set = dh.separate_attributes(dataset[i])
remaining_folds = np.concatenate(np.delete(dataset, i))
training_set = dh.separate_attributes(remaining_folds)
ada = ab.AdaBoost(training_set, testing_set)
results = ada.boost(301)
cv_accuracies.append(results[0])
cv_errors.append(results[1])
cv_model_errors.append(results[2])
# Convert lists to numpy arrays for faster calculations
cv_accuracies = np.asarray(cv_accuracies)
cv_errors = np.asarray(cv_errors)
cv_model_errors = np.asarray(cv_model_errors)
# Calculate the mean of the accuracies and the errors
cv_accuracies = np.divide(np.sum(cv_accuracies, axis=0), k)
cv_errors = np.divide(np.sum(cv_errors, axis=0), k)
cv_model_errors = np.divide(np.sum(cv_model_errors, axis=0), k)
# Save the results to a CSV
dh.save_results(cv_accuracies, "boosting_accuracy")
dh.save_results(cv_errors, "boosting_error")
dh.save_results(cv_model_errors, "model_error")
if __name__ == "__main__":
main()