-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenerate_opt_cout_boxplots.py
127 lines (103 loc) · 6.05 KB
/
generate_opt_cout_boxplots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os, sys
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
print(
"\n \
1. Enter: ~/mst_query_optimization\n \
2. Run the following command: /usr/bin/python3 figures/generate_opt_cout_boxplots.py\n \
\t Script requires 0 argument\n \
")
print('Number of arguments:', len(sys.argv) - 1)
print('Argument List:', str(sys.argv[1:]), '\n')
if len(sys.argv) != 1:
print("Wrong number of arguments.\n")
else:
try:
root_begin = "~/mst_query_optimization/"
input_queries = root_begin + "input_data/topology/workload_queries"
cost_data_folder = root_begin + "output_data/topology/costs_cout"
topology_names = {"0": "chain", "1": "cycle", "2": "star", "3": "clique"}
figure_file_name = "figures/topology_opt_box_plots_"
class GenerateFigures(object):
def __init__(self):
self.queries = {}
self.optimization_data = {}
self.load_queries()
self.load_opt_data()
self.print_data()
def load_queries(self):
for file_name in sorted(os.listdir(input_queries)):
topology = file_name.split("_")[0]
relations = file_name.split("_")[1]
query_name = file_name.split("_")[2]
if topology not in self.queries: self.queries[topology] = {}
if relations not in self.queries[topology]:
self.queries[topology][relations] = set()
self.queries[topology][relations].add(query_name)
def load_opt_data(self):
for file_name in sorted(os.listdir(cost_data_folder)):
enum_name = "_".join(file_name.split("_")[:-2])
self.optimization_data[enum_name] = {}
with open(cost_data_folder + "/" + file_name, "r") as input_f:
for idx, line in enumerate(input_f):
if idx == 0: continue
line = line.strip().split(",")
topology = line[0].split(".")[0].split("_")[0]
relations = line[0].split(".")[0].split("_")[1]
query_name = line[0].split(".")[0].split("_")[2]
opt_time = float(line[1].strip())
if topology not in self.optimization_data[enum_name]:
self.optimization_data[enum_name][topology] = {}
if relations not in self.optimization_data[enum_name][topology]:
self.optimization_data[enum_name][topology][relations] = {}
if query_name not in self.optimization_data[enum_name][topology][relations]:
self.optimization_data[enum_name][topology][relations][query_name] = opt_time
else: print("double query")
def print_data(self):
y_max_limit = [800, 800, 800, 800] # chain, cycle, star, clique
for t_idx, topology in enumerate(self.queries):
all_dfs = []
target_enums_print = ["IK-KBZ", "LinearizedDP", "GOO", "A* (BU)", "A* (TD)", "ESTE"]
target_enums = ["ikkbz", "lindp", "goo", "hsearch_bu", "hsearch_td", "ensemble"]
for relations in self.queries[topology]:
relation_data = []
for enum_name in target_enums:
query_opts = self.optimization_data[enum_name][topology][relations]
query_opt_runs = self.optimization_data["dpccp"][topology][relations]
relation_data.append([])
for q_idx in query_opts:
if enum_name == "ensemble":
query_opts[q_idx] *= 1000
relation_data[-1].append(query_opts[q_idx])
# relation_data[-1].append(query_opts[q_idx] / query_opt_runs[q_idx])
df_temp = pd.DataFrame(relation_data).T
df_temp.columns=target_enums_print
df_temp = df_temp.assign(Trial=relations)
all_dfs.append(df_temp)
font_size_value = 14
plt.rcParams["figure.figsize"] = (10,10)
palette_colors = ['brown', 'grey', 'pink', 'orange', 'purple', 'blue']
cdf = pd.concat(all_dfs)
mdf = pd.melt(cdf, id_vars=['Trial'], var_name=['Enumerators'])
ax = sns.boxplot(x="Trial", y="value", hue="Enumerators", fliersize=3, data=mdf, palette=palette_colors)
ax.set_ylim([0.9, y_max_limit[t_idx]])
if t_idx == 0:
ax.legend(fontsize=font_size_value, loc='upper left')
ax.set_ylabel("Optimization time (ms)", fontsize=font_size_value, fontdict=dict(weight='bold'))
elif t_idx == 2: # for arXiv version
ax.set_ylabel("Optimization time (ms)", fontsize=font_size_value, fontdict=dict(weight='bold'))
ax.legend([], [], frameon=False)
else:
ax.set_ylabel("", fontsize=font_size_value, fontdict=dict(weight='bold'))
ax.set_yticklabels([])
ax.legend([], [], frameon=False)
ax.set_xlabel("# relations", fontsize=font_size_value, fontdict=dict(weight='bold'))
# ax.set_title(topology_names[topology], fontsize=font_size_value, fontdict=dict(weight='bold'))
plt.savefig(figure_file_name + topology_names[topology] + ".pdf", bbox_inches='tight')
plt.close()
GenerateFigures()
####################################
print("\nSuccess.\n")
except:
print("Script errors.\n")