-
Notifications
You must be signed in to change notification settings - Fork 0
/
04_graph_experiment.py
81 lines (59 loc) · 2.17 KB
/
04_graph_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pandas as pd
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from tqdm import tqdm
from aobt import aobt
import os
topologies = ["euro28", "us26"]
representation = [
"graph_raw_conn",
"graph_raw_mean",
"graph_stat_dg",
"graph_stat_mdg",
]
metrics = [
('mae', mean_absolute_error),
('r2', r2_score),
('aobt', aobt),
]
targets = ["avg_transceivers", "max_transceivers", "sum_slots", "avg_max_slot"]
# @ Janiszewskiego 7, Wrocław
rkf = RepeatedKFold(n_splits=5, n_repeats=5, random_state=50372)
results = []
n_components = len(topologies) * len(representation) * len(targets) * rkf.get_n_splits()
p_bar = tqdm(range(n_components), desc="Progress")
for top in topologies:
for rep in representation:
csv_path = os.path.join('datasets', top, f"{rep}-full.csv")
# if not os.path.exists(csv_path):
# print(f"File {csv_path} does not exist")
# continue
data = pd.read_csv(csv_path)
# Won't be used -- for more sophisticated protocols only
data = data.drop(columns=["set_id", "n_requests"])
data = data.values
# Last 4 are targets
X = data[:, :-4]
y_ = data[:, -4:]
for target_i, target in enumerate(targets):
config = {
"topology": top,
"representation": rep,
"target": target,
}
y = y_[:, target_i]
scores = {}
# each row is same request_set in csv
for split_idx, (train, test) in enumerate(rkf.split(X, y)):
model = DecisionTreeRegressor()
model.fit(X[train], y[train])
y_pred = model.predict(X[test])
for metric, m_fn in metrics:
scores[f"{metric}_{split_idx}"] = m_fn(y[test], y_pred)
p_bar.update(1)
results.append({**config, **scores})
results_df = pd.DataFrame(results)
results_df.to_csv("04_graph_results.csv", index=False)