robotsorcerer
diff --git a/‎README.md
Lines changed: 7 additions & 0 deletions b/‎README.md
Lines changed: 7 additions & 0 deletions
diff --git a/‎experiments/mjc_mdgps_protagonist/hyperparams.py
Lines changed: 1 addition & 0 deletions b/‎experiments/mjc_mdgps_protagonist/hyperparams.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎python/gps/algorithm/algorithm.py
Lines changed: 40 additions & 3 deletions b/‎python/gps/algorithm/algorithm.py
Lines changed: 40 additions & 3 deletions
@@ -109,6 +109,13 @@ algorithm['traj_opt'] = {
 }
 ```
 
+* Add the following to `algorithm['policy_opt']` to account for the robust policy
+
+```python
+algorithm['policy_opt'] = {
+    'robust_weights_file_prefix': EXP_DIR + 'robust_policy',
+}
+```
 
 ### Docker Image
 
 
@@ -152,6 +152,7 @@
     'type': PolicyOptCaffe,
     'iterations': 4000,
     'weights_file_prefix': EXP_DIR + 'policy',
+    'robust_weights_file_prefix': EXP_DIR + 'robust_policy',
 }
 
 algorithm['policy_prior'] = {
 
@@ -64,8 +64,9 @@ def __init__(self, hyperparams):
                 self._hyperparams['init_traj_distr'], self._cond_idx[m] #L84 hyperparams
             )
             # note that both prot and adv act in turns on adversary
-            self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr) #will be init_lqr
-            self.cur[m].traj_distr_adv = init_traj_distr['type'](init_traj_distr) #adv traj dist
+            self.cur[m].traj_distr          = init_traj_distr['type'](init_traj_distr) #will be init_lqr / init_lqr_robust
+            self.cur[m].traj_distr_adv      = init_traj_distr['type'](init_traj_distr) #adv traj dist
+            self.cur[m].traj_distr_robust   = init_traj_distr['type'](init_traj_distr) #robust traj dist
 
             #init_lqr is defined in algorithm/policy/lin_gauss_init
         self.traj_opt = hyperparams['traj_opt']['type'](
@@ -98,7 +99,7 @@ def iteration_idg(self, sample_lists_prot, sample_list):
         """ Run iteration of the algorithm. """
         raise NotImplementedError("Must be implemented in subclass")
 
-    
+
     def _update_dynamics(self):
         """
         Instantiate dynamics objects and update prior. Fit dynamics to
@@ -176,6 +177,42 @@ def _update_trajectories(self):
             self.new_traj_distr[cond], self.cur[cond].eta = \
                     self.traj_opt.update(cond, self)
 
+    def _update_trajectories_robust(self):
+        """
+        Compute new linear Gaussian controllers.
+        """
+        if not hasattr(self, 'new_traj_distr'):
+            self.new_traj_distr = [
+                self.cur[cond].traj_distr for cond in range(self.M)
+            ]
+
+        if not hasattr(self, 'new_traj_distr_adv'):
+            self.new_traj_distr_adv = [
+                self.cur[cond].traj_distr_adv for cond in range(self.M)
+            ]
+
+        if not hasattr(self, 'new_traj_distr_robust'):
+            self.new_traj_distr_robust = [
+                self.cur[cond].traj_distr_robust for cond in range(self.M)
+            ]
+
+        for cond in range(self.M):
+            LOGGER.debug("updating protagonist trajectory")
+            self.new_traj_distr[cond], self.cur[cond].eta = \
+                    self.traj_opt.update_protagonist(cond, self)
+
+            LOGGER.debug("updating adversary trajectory")
+            self.new_traj_distr_adv[cond], self.cur[cond].eta_adv = \
+                    self.traj_opt.update_adversary(cond, self)
+
+            LOGGER.debug("Computing conditional of protagonist on adversary")
+            self.new_traj_distr_robust[cond], self.cur[cond].eta = \
+                    self.traj_opt.update_robust(cond, self, \
+                            self.new_traj_distr[cond], \
+                            self.new_traj_distr_adv[cond], \
+                            self.cur[cond].eta, \
+                            self.cur[cond].eta_adv)
+
     def _eval_cost(self, cond):
         """
         Evaluate costs for all samples for a condition.
Original file line number	Diff line number	Diff line change
`@@ -152,6 +152,7 @@`
`152`	`152`	`'type': PolicyOptCaffe,`
`153`	`153`	`'iterations': 4000,`
`154`	`154`	`'weights_file_prefix': EXP_DIR + 'policy',`
	`155`	`+ 'robust_weights_file_prefix': EXP_DIR + 'robust_policy',`
`155`	`156`	`}`
`156`	`157`
`157`	`158`	`algorithm['policy_prior'] = {`