-
Notifications
You must be signed in to change notification settings - Fork 6
/
grid_world.py
148 lines (130 loc) · 5.27 KB
/
grid_world.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import numpy as np
import copy
import random
class GridGame():
def __init__(self, gridsize=3, n_agents=2, seed=0):
self.availableActions = [0, 1, 2, 3] # Corresponding to forward north, westa, south, east respectively.
self.sizeofGridWorld = gridsize
self.numberofGoals = 2
self.numberofAgents = n_agents
self.seed = seed
self.obstacle = [np.random.choice(range(0, self.sizeofGridWorld)), int(np.median(range(0, self.sizeofGridWorld)))]
np.random.seed(seed)
random.seed(seed)
def getSensors(self):
# State: locations of all agents
state = [np.r_[self.agents[0], self.agents[1]],
np.r_[self.agents[0], self.agents[1]]]
return state
def performAction(self, actions):
tempPos = []
self.prevAgents = copy.deepcopy(self.agents)
self.isCollide = False
self.isCollideObstacle = [False, False]
for i in range(self.numberofAgents):
tempPos.append(self.__move__(copy.deepcopy(self.agents[i]), actions[i]))
if not self.__isCollideWithEachOther(tempPos) and not self.__isCollideWithObstacle(tempPos):
self.agents = tempPos
if self.__isCollideWithObstacle(tempPos):
if not self.isCollideObstacle[0]:
self.agents[0] = tempPos[0]
if not self.isCollideObstacle[1]:
self.agents[1] = tempPos[1]
def step(self, actions):
#observation, reward, done, info
self.performAction(actions)
rewards = self.getJointReward()
done = self.__isReachGoal()
state_next = self.getSensors()
return state_next, rewards, done
def __move__(self, position, forward):
if forward == 0: # Move North
position[1] += 1
elif forward == 1: # Move west
position[0] -= 1
elif forward == 2: # Move south
position[1] -= 1
elif forward == 3: # Move east
position[0] += 1
else:
assert False, "Unexpected action"
if position[0] >= self.sizeofGridWorld:
position[0] = self.sizeofGridWorld - 1
if position[0] < 0:
position[0] = 0
if position[1] >= self.sizeofGridWorld:
position[1] = self.sizeofGridWorld - 1
if position[1] < 0:
position[1] = 0
return position
def __isCollideWithEachOther(self, tempPos):
if (tempPos[0][0] == tempPos[1][0]) and (tempPos[0][1] == tempPos[1][1]):
if (tempPos[0][0] != self.goals[0][0]) or (tempPos[0][1] != self.goals[0][1]):
self.isCollide = True
return True
else:
return False
else:
return False
def __isCollideWithObstacle(self, tempPos):
if (tempPos[0][0] == self.obstacle[0]) and (tempPos[0][1] == self.obstacle[1]):
self.isCollideObstacle[0] = True
if (tempPos[1][0] == self.obstacle[0]) and (tempPos[1][1] == self.obstacle[1]):
self.isCollideObstacle[1] = True
if True in self.isCollideObstacle:
return True
else:
return False
def __isReachGoal(self):
# return boolean list, that determine if each agent reach each goal.
irGoal = [False, False]
if (self.agents[0][0] == self.goals[0][0]) and (self.agents[0][1] == self.goals[0][1]): # For the first agent.
irGoal[0] = True
self.isReachGoal = True
if (self.agents[1][0] == self.goals[1][0]) and (
self.agents[1][1] == self.goals[1][1]): # For the second agent.
irGoal[1] = True
self.isReachGoal = True
return irGoal
def reset(self):
self.agents = [np.array([0, 0]),
np.array([self.sizeofGridWorld-1, 0])]
self.prevAgents = [np.array([0, 0]),
np.array([self.sizeofGridWorld-1, 0])]
# row_1 = np.random.choice([0, 1], 1)[0]
# column_1 = np.random.choice([0, 1], 1)[0]
#
# second_list_row = [1, 2]
# if row_1 in second_list_row:
# second_list_row.remove(row_1)
#
# second_list_column = [0, 1]
# if column_1 in second_list_row:
# second_list_column.remove(column_1)
#
# row_2 = np.random.choice(second_list_row, 1)[0]
# column_2 = np.random.choice(second_list_column, 1)[0]
#
# self.agents = [np.array([row_1, column_1]),
# np.array([row_2, column_2])]
#
# self.prevAgents = [np.array([row_1, column_1]),
# np.array([row_2, column_2])]
self.isReachGoal = False
self.goals = [np.array([self.sizeofGridWorld-1, self.sizeofGridWorld-1]),
np.array([0, self.sizeofGridWorld-1])]
def getJointReward(self):
jointRew = [0, 0]
irGoal = self.__isReachGoal()
if irGoal[0]:
jointRew[0] = 100
if irGoal[1]:
jointRew[1] = 100
if self.isCollide:
jointRew[0] -= 1
jointRew[1] -= 1
if self.isCollideObstacle[0]:
jointRew[0] -= 1
if self.isCollideObstacle[1]:
jointRew[1] -= 1
return np.array(jointRew)