diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml index 099581cf8..383d80a3b 100644 --- a/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml +++ b/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml @@ -1,8 +1,8 @@ algo: ppo algo_config: # model args - hidden_dim: 128 - activation: tanh + hidden_dim: 64 + activation: relu # loss args gamma: 0.98 @@ -19,7 +19,7 @@ algo_config: critic_lr: 0.001 # runner args - max_env_steps: 2640000 + max_env_steps: 660000 rollout_batch_size: 1 rollout_steps: 660 eval_batch_size: 10 diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml index ed0dc4227..168bffbb6 100644 --- a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml +++ b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml @@ -34,18 +34,10 @@ task_config: # RL Reward rew_state_weight: [10, 0.1, 10, 0.1, 0.1, 0.001] rew_act_weight: [0.1, 0.1] + info_mse_metric_state_weight: [1, 0, 1, 0, 0, 0] rew_exponential: True - # disturbances: - # observation: - # - disturbance_func: white_noise - # std: [5.6e-05, 1.5e-04, 2.9e-05, 8.0e-04, 1.3e-04, 3.6e-04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - constraints: - - constraint_form: default_constraint - constrained_variable: state - upper_bounds: [ 0.9, 2, 1.45, 2, 0.75, 3] - lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3] - constraint_form: default_constraint constrained_variable: input diff --git a/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml b/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml index c9a8f8f93..638ab0f7f 100644 --- a/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml +++ b/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml @@ -1,8 +1,8 @@ algo: ppo algo_config: # model args - hidden_dim: 128 - activation: tanh + hidden_dim: 64 + activation: relu # loss args gamma: 0.98 @@ -19,7 +19,7 @@ algo_config: critic_lr: 0.001 # runner args - max_env_steps: 2640000 + max_env_steps: 660000 rollout_batch_size: 1 rollout_steps: 660 eval_batch_size: 10 diff --git a/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml b/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml index ed0dc4227..2fe979028 100644 --- a/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml +++ b/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml @@ -34,13 +34,9 @@ task_config: # RL Reward rew_state_weight: [10, 0.1, 10, 0.1, 0.1, 0.001] rew_act_weight: [0.1, 0.1] + info_mse_metric_state_weight: [1, 0, 1, 0, 0, 0] rew_exponential: True - # disturbances: - # observation: - # - disturbance_func: white_noise - # std: [5.6e-05, 1.5e-04, 2.9e-05, 8.0e-04, 1.3e-04, 3.6e-04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - constraints: - constraint_form: default_constraint constrained_variable: state diff --git a/safe_control_gym/safety_filters/mpsc/nl_mpsc.py b/safe_control_gym/safety_filters/mpsc/nl_mpsc.py index 44ff11655..36b2f012c 100644 --- a/safe_control_gym/safety_filters/mpsc/nl_mpsc.py +++ b/safe_control_gym/safety_filters/mpsc/nl_mpsc.py @@ -990,26 +990,20 @@ def setup_acados_optimizer(self): # set cost module ocp.cost.cost_type = 'LINEAR_LS' - ocp.cost.cost_type_e = 'LINEAR_LS' Q_mat = np.zeros((nx, nx)) - ocp.cost.W_e = np.zeros((nx, nx)) R_mat = np.eye(nu) ocp.cost.W = block_diag(Q_mat, R_mat) ocp.cost.Vx = np.zeros((ny, nx)) ocp.cost.Vu = np.zeros((ny, nu)) ocp.cost.Vu[nx:nx + nu, :] = np.eye(nu) - ocp.cost.Vx_e = np.eye(nx) # Updated on each iteration ocp.cost.yref = np.concatenate((self.model.X_EQ, self.model.U_EQ)) - ocp.cost.yref_e = self.model.X_EQ # set constraints ocp.constraints.constr_type = 'BGH' - ocp.constraints.constr_type_e = 'BGH' - ocp.constraints.x0 = self.model.X_EQ ocp.constraints.C = self.L_x ocp.constraints.D = self.L_u