-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCube.py
83 lines (69 loc) · 2.27 KB
/
Cube.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# to represent the 3x3 Rubik's Cube environment
import pycuber as pc
class Cube(object):
def __init__(self):
self.pycube = pc.Cube()
self.alg = pc.Formula()
self.statesize = 20 * 24
self.actionsize = 12
self.solvedstr = str(pc.Cube())
# actions
self.action_list = ["U", "U'", "D", "D'", "L", "L'", "R", "R'", "F", "F'", "B", "B'"]
self.action_map = {self.action_list[i]: i for i in range(len(self.action_list))}
self.inverse = {
"U":"U'",
"U'":"U",
"D":"D'",
"D'":"D",
"L":"L'",
"L'":"L",
"R":"R'",
"R'":"R",
"F":"F'",
"F'":"F",
"B":"B'",
"B'":"B",
}
# reset the environment by generating a scramble n moves away from the solved state
def reset(self, n=-1):
self.pycube = pc.Cube()
randomalg = ""
if (n < 0):
randomalg = self.alg.random()
elif (n != 0):
randomalg = self.alg.random(n)
# execute the algorithm on the cube
self.pycube(randomalg)
return self.pycube, randomalg
# step by executing the current action on the cube
def step(self, act):
self.pycube(act)
reward = -1
done = False
# check for termination by comparing to the solved state
if str(self.pycube) == self.solvedstr:
reward = 1
done = True
return self.pycube, reward, done
# render the cube in terminal using colors
def render(self):
print(repr(self.pycube))
# explore each action from the current state
def explore(self):
res_states, res_rewards, res_dones = [], [], []
for action in self.action_list:
# step in the direction
next_s, reward, done = self.step(action)
res_states.append(str(next_s))
res_rewards.append(reward)
res_dones.append(int(done))
# revert it back
_,_,_ = self.step(self.inverse[action])
return res_states, res_rewards, res_dones
# Uncomment to debug
# env = Cube()
# print("B2 D F R' U L")
# pcube = env.step("")
# print("")
# env.render()
# print("Solved! Sequence: L' U' R F' D' B' B'")