-
Notifications
You must be signed in to change notification settings - Fork 5
/
debug.py
144 lines (113 loc) · 4.26 KB
/
debug.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import numpy as np
import matplotlib.pyplot as plt
def ma(x, w):
return np.convolve(x, np.ones(w), 'valid') / w
fig, ax = plt.subplots()
############################################################
#
# Plot Training Reward
#
############################################################
reward = []
for s in range(5):
x = np.load("results/QCOMBO{}/training_reward_4.npy".format(s))
reward.append(x)
reward = np.array(reward)
avg = ma(reward.mean(0), 50)
std = ma(reward.std(0), 50)
length = len(avg)
ax.fill_between(np.arange(length), avg - 1.96 * std, avg + 1.96 * std, alpha=0.3)
plt.plot(ma(reward.mean(0), 50), label = "QCOMBO")
for (lam, alpha) in [(1e-1, 1e-2)]:#, (5e-1, 1e-2)]:#, (1e-1, 1e-1)]:
reward = []
for s in range(5):
x = np.load("results/QCOMBO_adv{}{}{}/training_reward_4.npy".format(0, lam, alpha))
reward.append(x)
reward = np.array(reward)
avg = ma(reward.mean(0), 50)
std = ma(reward.std(0), 50)
length = len(avg)
ax.fill_between(np.arange(length), avg - 1.96 * std, avg + 1.96 * std, alpha=0.3)
plt.plot(ma(reward.mean(0), 50), label="QCOMBO_adv{}{}".format(lam, alpha))
for (lam, alpha) in [(1e-1, 1e-2)]:# (1e-1, 1e-1)]:
reward = []
for s in range(5):
x = np.load("results/QCOMBO_adv{}{}{}{}/training_reward_4.npy".format(0, lam, alpha, True))
reward.append(x)
reward = np.array(reward)
avg = ma(reward.mean(0), 50)
std = ma(reward.std(0), 50)
length = len(avg)
ax.fill_between(np.arange(length), avg - 1.96 * std, avg + 1.96 * std, alpha=0.3)
plt.plot(ma(reward.mean(0), 50), label="QCOMBO_Stackelberg{}{}".format(lam, alpha))
plt.legend()
plt.title("Training Reward")
plt.show()
############################################################
#
# Plot Eval Reward
#
############################################################
reward = []
for s in range(5):
x = np.load("results/QCOMBO{}/eval_reward_{}_{}_{}.npy".format(s, True, 1e-1, 35))
reward.append(x)
reward = np.array(reward)
avg = ma(reward.mean(0), 50)
std = ma(reward.std(0), 50) / 5
length = len(avg)
ax.fill_between(np.arange(length), avg - 1.96 * std, avg + 1.96 * std, alpha=0.3)
plt.plot(ma(reward.mean(0), 50), label = "QCOMBO")
for (lam, alpha) in [(1e-1, 1e-2)]:#, (5e-1, 1e-2), (1e-1, 1e-1)]:
reward = []
for s in range(5):
x = np.load("results/QCOMBO_adv{}{}{}/eval_reward_{}_{}_{}.npy".format(s, lam, alpha, True, 1e-1, 35))
reward.append(x)
reward = np.array(reward)
avg = ma(reward.mean(0), 50)
std = ma(reward.std(0), 50) / 5
length = len(avg)
ax.fill_between(np.arange(length), avg - 1.96 * std, avg + 1.96 * std, alpha=0.3)
plt.plot(ma(reward.mean(0), 50), label="QCOMBO_adv{}{}".format(lam, alpha))
for (lam, alpha) in [(1e-1, 1e-2)]:#, (1e-1, 1e-1)]:
reward = []
for s in range(5):
x = np.load("results/QCOMBO_adv{}{}{}{}/eval_reward_{}_{}_{}_{}.npy".format(s, lam, alpha, True, True, 1e-1, 35, 4))
reward.append(x)
reward = np.array(reward)
avg = ma(reward.mean(0), 50)
std = ma(reward.std(0), 50) / 5
length = len(avg)
ax.fill_between(np.arange(length), avg - 1.96 * std, avg + 1.96 * std, alpha=0.3)
plt.plot(ma(reward.mean(0), 50), label="QCOMBO_stack{}{}".format(lam, alpha))
plt.legend()
plt.title("Eval")
plt.show()
# ############################################################
# #
# # Plot Eval Reward
# #
# ############################################################
# reward = []
# for s in range(5):
# x = np.load("results/QCOMBO{}/eval_reward_{}_{}_{}.npy".format(s, False, 0, 40))
# reward.append(x)
# reward = np.array(reward)[:, 1000:]
# avg = ma(reward.mean(0), 50)
# std = ma(reward.std(0), 50)
# length = len(avg)
# #ax.fill_between(np.arange(length), avg - 1.96 * std, avg + 1.96 * std, alpha=0.3)
# plt.plot(ma(reward.mean(0), 50), label = "QCOMBO")
# for (lam, alpha) in [(5e-1, 1e-2), (1e-1, 1e-2), (1e-1, 1e-1)]:
# reward = []
# for s in range(5):
# x = np.load("results/QCOMBO_adv{}{}{}/eval_reward_{}_{}_{}.npy".format(s, lam, alpha, False, 0, 40))
# reward.append(x)
# reward = np.array(reward)[:, 1000:]
# avg = ma(reward.mean(0), 50)
# std = ma(reward.std(0), 50)
# length = len(avg)
# #ax.fill_between(np.arange(length), avg - 1.96 * std, avg + 1.96 * std, alpha=0.3)
# plt.plot(ma(reward.mean(0), 50), label="QCOMBO_adv{}{}".format(lam, alpha))
# plt.legend()
# plt.show()