-
Notifications
You must be signed in to change notification settings - Fork 34
/
evaluation.py
87 lines (72 loc) · 3.15 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import importlib
import os
import sys
import numpy as np
import python.metrics as mymetrics
from python.data import Dataloader as myDataloader
if __name__ == "__main__":
### First part: test of the MIP model
print("Starting Python script for evaluation")
print("Path to data is:", sys.argv[1])
path_to_data = sys.argv[1]
print("Path to Repo is:", sys.argv[2])
path_to_repo = sys.argv[2]
sys.path.append(os.path.join(path_to_repo, "python"))
import models
print("MIP Model - dataset_4:")
data_length = 1000
data_loader = myDataloader(os.path.join(path_to_data, "dataset_4")) # Path to test dataset
X, Y = data_loader.load(length=data_length)
print(X.shape)
np.random.seed(123)
model = models.TwoClustersMIP(
n_clusters=2, n_pieces=5,
) # You can add your model's arguments here, the best would be set up the right ones as default.
model.fit(X, Y)
print(model)
# %Pairs Explained
pairs_explained = mymetrics.PairsExplained()
pe_m1 = pairs_explained.from_model(model, X, Y)
print("####--------------------------------####")
print("Percentage of explained preferences:", pe_m1)
# %Cluster Intersection
cluster_intersection = mymetrics.ClusterIntersection()
Z = data_loader.get_ground_truth_labels(length=data_length)
print("% of pairs well grouped together by the model:")
ri_m1 = cluster_intersection.from_model(model, X, Y, Z)
print("Cluster intersection for all samples:", ri_m1)
print("####--------------------------------####")
### 2nd part: test of the heuristic model
data_loader = myDataloader(os.path.join(path_to_data, "dataset_10")) # Path to test dataset
X, Y = data_loader.load()
indexes = np.linspace(0, len(X) - 1, num=len(X), dtype=int)
np.random.shuffle(indexes)
train_indexes = indexes[: int(len(indexes) * 0.8)]
test_indexes = indexes[int(len(indexes) * 0.8) :]
X_train = X[train_indexes]
Y_train = Y[train_indexes]
model = models.HeuristicModel()
model.fit(X_train, Y_train)
X_test = X[test_indexes]
Y_test = Y[test_indexes]
Z_test = data_loader.get_ground_truth_labels()[test_indexes]
# Validation on test set
# %Pairs Explained
print("####--------------------------------####")
pairs_explained = mymetrics.PairsExplained()
print("Percentage of explained preferences:", pairs_explained.from_model(model, X_test, Y_test))
pe_m2 = pairs_explained.from_model(model, X_test, Y_test)
# %Cluster Intersection
cluster_intersection = mymetrics.ClusterIntersection()
print("% of pairs well grouped together by the model:")
print(
"Cluster intersection for all samples:",
cluster_intersection.from_model(model, X_test, Y_test, Z_test),
)
print("####--------------------------------####")
ri_m2 = cluster_intersection.from_model(model, X_test, Y_test, Z_test)
with open(os.path.join(path_to_repo, "results.txt"), "w") as file:
file.write(f"Model 1 Pairs explained: {pe_m1}\n")
file.write(f"Model 1 RandIndex: {ri_m1}\n")
file.write(f"Model 2 Pairs explained: {pe_m2}\n")
file.write(f"Model 2 RandIndex: {ri_m2}\n")