-
Notifications
You must be signed in to change notification settings - Fork 177
/
Copy pathtransition_matrix_generation.py
122 lines (93 loc) · 4.27 KB
/
transition_matrix_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python
#The MIT License (MIT)
#Copyright (c) 2016 Massimiliano Patacchiola
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
#CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#This is a really raw script for generating the Transition matrix T
#for the cleaning robot world. It is a quick and dirty script but it works.
#I spent only 5 minutes on it and it can be improved to take into account
#the dimension of the world and the number of actions for generating
#different matrices. The matrix T has shape: state x state' x actions
#
#The script save the matrix in a numpy file "T.npy" which can be
#load using the function numpy.load("T.npy")
import numpy as np
def return_transition(row, col, action, tot_row, tot_col):
if(row > tot_row-1 or col > tot_col-1):
print("ERROR: the index is out of range...")
return None
extended_world = np.zeros((tot_row+2, tot_col+2))
#If the state is on the grey-obstacle it returns all zeros
if(row == 1 and col == 1): return extended_world[1:4, 1:5]
#If the process is on the final reward state it returns zeros
if(row == 0 and col == 3): return extended_world[1:4, 1:5]
#If the process is on the final punishment state then returns zeros
if(row == 1 and col == 3): return extended_world[1:4, 1:5]
if(action=="up"):
col += 1
row += 1
extended_world[row-1, col] = 0.8
extended_world[row, col+1] = 0.1
extended_world[row, col-1] = 0.1
elif(action=="down"):
col += 1
row += 1
extended_world[row+1, col] = 0.8
extended_world[row, col+1] = 0.1
extended_world[row, col-1] = 0.1
elif(action=="left"):
col += 1
row += 1
extended_world[row-1, col] = 0.1
extended_world[row+1, col] = 0.1
extended_world[row, col-1] = 0.8
elif(action=="right"):
col += 1
row += 1
extended_world[row-1, col] = 0.1
extended_world[row+1, col] = 0.1
extended_world[row, col+1] = 0.8
#Reset the obstacle
if(extended_world[2, 2] != 0): extended_world[row, col] += extended_world[2, 2]
extended_world[2, 2] = 0.0
#Control bouncing
for row in range(0, 5):
if(extended_world[row, 0] != 0): extended_world[row, 1] += extended_world[row, 0]
if(extended_world[row, 5] != 0): extended_world[row, 4] += extended_world[row, 5]
for col in range(0, 6):
if(extended_world[0, col] != 0): extended_world[1, col] += extended_world[0, col]
if(extended_world[4, col] != 0): extended_world[3, col] += extended_world[4, col]
return extended_world[1:4, 1:5]
def main():
#T = return_transition(row=2, col=0, action="up")
#T = return_transition(row=0, col=1, action="down")
#T = return_transition(row=1, col=3, action="left")
#T = return_transition(row=2, col=1, action="up")
#print(T)
T = np.zeros((12, 12, 4))
counter = 0
for row in range(0, 3):
for col in range(0, 4):
line = return_transition(row, col, action="up", tot_row=3, tot_col=4)
T[counter, : , 0] = line.flatten()
line = return_transition(row, col, action="left", tot_row=3, tot_col=4)
T[counter, : , 1] = line.flatten()
line = return_transition(row, col, action="down", tot_row=3, tot_col=4)
T[counter, : , 2] = line.flatten()
line = return_transition(row, col, action="right", tot_row=3, tot_col=4)
T[counter, : , 3] = line.flatten()
counter += 1
#print(T[:,:,3])
u = np.array([[0.0, 0.0, 0.0 ,0.0,
0.0, 0.0, 0.0 ,1.0,
0.0, 0.0, 0.0 ,0.0]])
#u = np.zeros((1, 12))
print(np.dot(u, T[:,:,2]))
print("Saving T in 'T.npy' ...")
np.save("T", T)
print("Done!")
if __name__ == "__main__":
main()