-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathasync_vector_environment_main.py
52 lines (35 loc) · 1.32 KB
/
async_vector_environment_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gym
from gym.vector.async_vector_env import AsyncVectorEnv
import random
import numpy as np
from agent.ppo_agent import PPOAgent
import torch
from torch.distributions import Categorical
T_horizon = 100
if __name__ == "__main__":
env = AsyncVectorEnv([
lambda: gym.make("CartPole-v1"),
lambda: gym.make("CartPole-v1"),
lambda: gym.make("CartPole-v1")
])
agent = PPOAgent(observation_space=4, action_space=2, num_envs=env.num_envs)
score = 0.0
for i_eps in range(10000):
states = env.reset()
any_done = False
while not any_done:
for t in range(T_horizon):
actions, action_probs = agent.get_actions(states)
next_states, rewards, dones, infos = env.step(actions)
for i in range(env.num_envs):
agent.save_xps(i, (states[i], next_states[i], actions[i], action_probs[i][actions[i]].item(), rewards[i], dones[i]))
states = next_states
score += np.mean(rewards)
if np.any(dones):
any_done = True
break
agent.train()
if i_eps % 10 == 0:
print("{} episode avg score : {:.1f}".format(i_eps+1, score/10))
score = 0.0
env.close()