forked from andrew-j-levy/Hierarchical-Actor-Critc-HAC-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagent.py
172 lines (111 loc) · 6 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import numpy as np
from layer import Layer
from environment import Environment
import pickle as cpickle
import tensorflow as tf
import os
import pickle as cpickle
# Below class instantiates an agent
class Agent():
def __init__(self,FLAGS, env, agent_params):
self.FLAGS = FLAGS
self.sess = tf.Session()
# Set subgoal testing ratio each layer will use
self.subgoal_test_perc = agent_params["subgoal_test_perc"]
# Create agent with number of levels specified by user
self.layers = [Layer(i,FLAGS,env,self.sess,agent_params) for i in range(FLAGS.layers)]
# Below attributes will be used help save network parameters
self.saver = None
self.model_dir = None
self.model_loc = None
# Initialize actor/critic networks. Load saved parameters if not retraining
self.initialize_networks()
# goal_array will store goal for each layer of agent.
self.goal_array = [None for i in range(FLAGS.layers)]
self.current_state = None
# Track number of low-level actions executed
self.steps_taken = 0
# Below hyperparameter specifies number of Q-value updates made after each episode
self.num_updates = 40
# Below parameters will be used to store performance results
self.performance_log = []
self.other_params = agent_params
# Determine whether or not each layer's goal was achieved. Also, if applicable, return the highest level whose goal was achieved.
def check_goals(self,env):
# goal_status is vector showing status of whether a layer's goal has been achieved
goal_status = [False for i in range(self.FLAGS.layers)]
max_lay_achieved = None
# Project current state onto the subgoal and end goal spaces
proj_subgoal = env.project_state_to_subgoal(env.sim, self.current_state)
proj_end_goal = env.project_state_to_end_goal(env.sim, self.current_state)
for i in range(self.FLAGS.layers):
goal_achieved = True
# If at highest layer, compare to end goal thresholds
if i == self.FLAGS.layers - 1:
# Check dimensions are appropriate
assert len(proj_end_goal) == len(self.goal_array[i]) == len(env.end_goal_thresholds), "Projected end goal, actual end goal, and end goal thresholds should have same dimensions"
# Check whether layer i's goal was achieved by checking whether projected state is within the goal achievement threshold
for j in range(len(proj_end_goal)):
if np.absolute(self.goal_array[i][j] - proj_end_goal[j]) > env.end_goal_thresholds[j]:
goal_achieved = False
break
# If not highest layer, compare to subgoal thresholds
else:
# Check that dimensions are appropriate
assert len(proj_subgoal) == len(self.goal_array[i]) == len(env.subgoal_thresholds), "Projected subgoal, actual subgoal, and subgoal thresholds should have same dimensions"
# Check whether layer i's goal was achieved by checking whether projected state is within the goal achievement threshold
for j in range(len(proj_subgoal)):
if np.absolute(self.goal_array[i][j] - proj_subgoal[j]) > env.subgoal_thresholds[j]:
goal_achieved = False
break
# If projected state within threshold of goal, mark as achieved
if goal_achieved:
goal_status[i] = True
max_lay_achieved = i
else:
goal_status[i] = False
return goal_status, max_lay_achieved
def initialize_networks(self):
model_vars = tf.trainable_variables()
self.saver = tf.train.Saver(model_vars)
# Set up directory for saving models
self.model_dir = os.getcwd() + '/models'
self.model_loc = self.model_dir + '/HAC.ckpt'
if not os.path.exists(self.model_dir):
os.makedirs(self.model_dir)
# Initialize actor/critic networks
self.sess.run(tf.global_variables_initializer())
# If not retraining, restore weights
# if we are not retraining from scratch, just restore weights
if self.FLAGS.retrain == False:
self.saver.restore(self.sess, tf.train.latest_checkpoint(self.model_dir))
# Save neural network parameters
def save_model(self, episode):
self.saver.save(self.sess, self.model_loc, global_step=episode)
# Update actor and critic networks for each layer
def learn(self):
for i in range(len(self.layers)):
self.layers[i].learn(self.num_updates)
# Train agent for an episode
def train(self,env, episode_num):
# Select final goal from final goal space, defined in "design_agent_and_env.py"
self.goal_array[self.FLAGS.layers - 1] = env.get_next_goal(self.FLAGS.test)
print("Next End Goal: ", self.goal_array[self.FLAGS.layers - 1])
# Select initial state from in initial state space, defined in environment.py
self.current_state = env.reset_sim()
# print("Initial State: ", self.current_state)
# Reset step counter
self.steps_taken = 0
# Train for an episode
goal_status, max_lay_achieved = self.layers[self.FLAGS.layers-1].train(self,env, episode_num = episode_num)
# Update actor/critic networks if not testing
if not self.FLAGS.test:
self.learn()
# Return whether end goal was achieved
return goal_status[self.FLAGS.layers-1]
# Save performance evaluations
def log_performance(self, success_rate):
# Add latest success_rate to list
self.performance_log.append(success_rate)
# Save log
cpickle.dump(self.performance_log,open("performance_log.p","wb"))