-
Notifications
You must be signed in to change notification settings - Fork 0
/
ppenv.py
96 lines (85 loc) · 3.61 KB
/
ppenv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from gym import Env
from gym.spaces import Discrete, Box, MultiDiscrete
import random
import gym
import numpy as np
class PPEnv(Env):
def __init__(self, L=5, time_limit=40, pos=(0, 0)):
self.L = L
self.pos = (self.L // 2, self.L // 2)
self.action_space = Discrete(4)
self.obs_shape = (5, 5)
self.observation_space = gym.spaces.utils.flatten_space(MultiDiscrete(np.array([[1 for i in range(5)] for j in range(5)]))) #MultiDiscrete(np.array([[1 for i in range(5)] for j in range(5)]))
self.types = Discrete(3)
self.array = np.array([[Discrete(2).sample() * 2 for i in range(self.L)] for j in range(self.L)])
# Put one predator
self.array[self.pos[0], self.pos[1]] = 1
self.time = 0
self.time_limit = time_limit
def get_obs(self):
obs = []
i, j = self.pos
for ii in range(-2, 3, 1):
row = []
for jj in range(-2, 3, 1):
row.append(self.array[(i + ii) % self.L][(j + jj) % self.L])
obs.append(row)
return np.array(obs)
# Jump
def apply_action(self, action):
i, j = self.pos
L = self.L
if action == 0:
self.array[i][j], self.array[i][(j + 1) % L] = self.array[i][(j + 1) % L], self.array[i][j]
self.pos = (i, (j + 1) % L)
elif action == 1:
self.array[i][j], self.array[i][(j - 1) % L] = self.array[i][(j - 1) % L], self.array[i][j]
self.pos = (i, (j - 1) % L)
elif action == 2:
self.array[i][j], self.array[(i - 1) % L][j] = self.array[(i - 1) % L][j], self.array[i][j]
self.pos = ((i - 1) % L, j)
elif action == 3:
self.array[i][j], self.array[(i + 1) % L][j] = self.array[(i + 1) % L][j], self.array[i][j]
self.pos = ((i + 1) % L, j)
def get_neighboors(self):
i, j = self.pos
adress = [
(i, (j + 1) % self.L),
(i, (j - 1) % self.L),
((i - 1) % self.L, j),
((i + 1) % self.L, j)
]
right, left, up, down = self.array[adress[0][0]][adress[0][1]], self.array[adress[1][0]][adress[1][1]], self.array[adress[2][0]][adress[2][1]], self.array[adress[3][0]][adress[3][1]]
return [right, left, up, down]
def step(self, action):
# Get obs
obs = self.get_obs()
# Colect reward
ngbs = [obs[2][2 + 1], obs[2][2 - 1], obs[2 - 1][2], obs[2 + 1][2]] #self.get_neighboors()
reward = 2 ** ngbs.count(2)
# Get info
info = {}
# Check if done
if self.time >= self.time_limit:
done = True
return obs, reward, done, info
else:
done = False
# apply action
self.apply_action(action)
self.time += 1
return obs.reshape(1, -1), reward, done, info
def render(self):
print('-' * self.L)
print(self.array)
print('-' * self.L)
def reset(self):
self.pos = (self.L // 2, self.L // 2)
self.action_space = Discrete(4)
self.observation_space = gym.spaces.utils.flatten_space(MultiDiscrete(np.array([[1 for i in range(5)] for j in range(5)]))) # MultiDiscrete(np.array([[3 for i in range(5)] for j in range(5)]))
self.types = Discrete(3)
self.array = np.array([[Discrete(2).sample() * 2 for i in range(self.L)] for j in range(self.L)])
# Put one predator
self.array[self.pos[0], self.pos[1]] = 1
self.time = 0
return self.get_obs().reshape(1, -1)