-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdeepq.py
executable file
·41 lines (32 loc) · 1.04 KB
/
deepq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from environment import Environment
from agent import Agent
import numpy as np
if __name__ == "__main__":
episode_count = 10
max_steps = 50
buffer_size = 100000
env = Environment()
agent = Agent()
for i in range(episode_count):
reward = 0
done = False
step = 0
total_reward = 0.
print("Episode : " + str(i))
# Sometimes you need to relaunch TORCS because of the memory leak error
if np.mod(i, 3) == 0:
observation = env.reset(relaunch=True)
else:
observation = env.reset()
for j in range(max_steps):
action = agent.act(observation, reward, done)
observation, reward, done = env.step(action)
total_reward += reward
step += 1
if done:
break
print("TOTAL REWARD @ " + str(i) +" -th Episode : " + str(total_reward))
print("Total Step: " + str(step))
print("")
env.close_torcs() # This is for shutting down TORCS
print("Finish.")