Adding basic code to run fast NL acados MPSF

Federico-PizarroBejarano · Federico-PizarroBejarano · commit 57fe4870cb22 · 2023-10-18T17:16:45.000-04:00
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,8 @@ results/
 z_docstring.py
 TODOs.md
 
+**/c_generated_code/**
+**/acados_**
 
 
 # macOS users
diff --git a/experiments/mpsc/config_overrides/cartpole/nl_mpsc_cartpole.yaml b/experiments/mpsc/config_overrides/cartpole/nl_mpsc_cartpole.yaml
@@ -10,6 +10,7 @@ sf_config:
     - 0.5
 
   # MPC Parameters
+  use_acados: False
   horizon: 20
   warmstart: True
   integration_algo: rk4
diff --git a/experiments/mpsc/config_overrides/quadrotor_2D/nl_mpsc_quadrotor_2D.yaml b/experiments/mpsc/config_overrides/quadrotor_2D/nl_mpsc_quadrotor_2D.yaml
@@ -12,6 +12,7 @@ sf_config:
     - 0.5
 
   # MPC Parameters
+  use_acados: False
   horizon: 20
   warmstart: True
   integration_algo: rk4
diff --git a/experiments/mpsc/config_overrides/quadrotor_3D/nl_mpsc_quadrotor_3D.yaml b/experiments/mpsc/config_overrides/quadrotor_3D/nl_mpsc_quadrotor_3D.yaml
@@ -18,10 +18,11 @@ sf_config:
     - 1
 
   # MPC Parameters
+  use_acados: True
   horizon: 20
   warmstart: True
   integration_algo: rk4
-  use_terminal_set: True
+  use_terminal_set: False
 
   # Prior info
   prior_info:
diff --git a/experiments/mpsc/mpsc_experiment.sh b/experiments/mpsc/mpsc_experiment.sh
@@ -26,11 +26,11 @@ fi
 # SAFETY_FILTER='linear_mpsc'
 SAFETY_FILTER='nl_mpsc'
 
-# MPSC_COST='one_step_cost'
+MPSC_COST='one_step_cost'
 # MPSC_COST='constant_cost'
 # MPSC_COST='regularized_cost'
 # MPSC_COST='lqr_cost'
-MPSC_COST='precomputed_cost'
+# MPSC_COST='precomputed_cost'
 # MPSC_COST='learned_cost'
 
 MPSC_COST_HORIZON=2
diff --git a/experiments/mpsc/train_model.sbatch b/experiments/mpsc/train_model.sbatch
@@ -104,7 +104,7 @@ python3 train_rl.py \
     --overrides \
         ./config_overrides/${SYS}/${ALGO}_${SYS}.yaml \
         ./config_overrides/${SYS}/${SYS}_${TASK}.yaml \
-        ./config_overrides/${SYS}/${SAFETY_FILTER}_${SYS}_linear.yaml \
+        ./config_overrides/${SYS}/${SAFETY_FILTER}_${SYS}.yaml \
     --output_dir ./models/rl_models/${SYS}/${TASK}/${ALGO}/${TAG}/ \
     --seed 2 \
     --kv_overrides \
diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py
@@ -304,11 +304,11 @@ def train_step(self):
                 certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
                 if success and self.filter_train_actions is True:
                     action = self.env.envs[0].normalize_action(certified_action)
-                elif not success:
-                    self.safety_filter.setup_optimizer()
-                    certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
-                    if success and self.filter_train_actions is True:
-                        action = self.env.envs[0].normalize_action(certified_action)
+                # elif not success:
+                #     self.safety_filter.setup_optimizer()
+                #     certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
+                #     if success and self.filter_train_actions is True:
+                #         action = self.env.envs[0].normalize_action(certified_action)
 
             action = np.atleast_2d(np.squeeze([action]))
             next_obs, rew, done, info = self.env.step(action)
@@ -435,15 +435,15 @@ def env_reset(self, env):
 
         if self.use_safe_reset is True and self.safety_filter is not None:
 
-            while success is not True or np.any(self.safety_filter.slack_prev > 10e-6):
+            while success is not True:  # or np.any(self.safety_filter.slack_prev > 10e-6):
                 obs, info = env.reset()
                 info['current_step'] = 1
                 physical_action = self.env.envs[0].denormalize_action(act)
                 unextended_obs = np.squeeze(obs)[:self.env.envs[0].symbolic.nx]
                 self.safety_filter.reset_before_run()
                 _, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
-                if not success:
-                    self.safety_filter.setup_optimizer()
-                    _, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
+                # if not success:
+                #     self.safety_filter.setup_optimizer()
+                #     _, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
 
         return obs, info
diff --git a/safe_control_gym/safety_filters/mpsc/mpsc.py b/safe_control_gym/safety_filters/mpsc/mpsc.py
@@ -40,6 +40,7 @@ def __init__(self,
                  cost_function: Cost_Function = Cost_Function.ONE_STEP_COST,
                  mpsc_cost_horizon: int = 5,
                  decay_factor: float = 0.85,
+                 use_acados: bool = False,
                  **kwargs
                  ):
         '''Initialize the MPSC.
@@ -97,6 +98,7 @@ def __init__(self,
 
         if cost_function == Cost_Function.ONE_STEP_COST:
             self.cost_function = ONE_STEP_COST()
+            self.mpsc_cost_horizon = 1
             self.cost_function.mpsc_cost_horizon = 1
         elif cost_function == Cost_Function.CONSTANT_COST:
             self.cost_function = CONSTANT_COST(self.env, mpsc_cost_horizon, decay_factor)
@@ -116,9 +118,21 @@ def set_dynamics(self):
         '''Compute the dynamics.'''
         raise NotImplementedError
 
-    @abstractmethod
     def setup_optimizer(self):
         '''Setup the certifying MPC problem.'''
+        if self.use_acados:
+            self.setup_acados_optimizer()
+        else:
+            self.setup_casadi_optimizer()
+
+    @abstractmethod
+    def setup_casadi_optimizer(self):
+        '''Setup the certifying MPC problem using CasADi.'''
+        raise NotImplementedError
+
+    @abstractmethod
+    def setup_acados_optimizer(self):
+        '''Setup the certifying MPC problem using ACADOS.'''
         raise NotImplementedError
 
     def before_optimization(self, obs):
@@ -146,6 +160,28 @@ def solve_optimization(self,
             feasible (bool): Whether the safety filtering was feasible or not.
         '''
 
+        if self.use_acados:
+            action, feasible = self.solve_acados_optimization(obs, uncertified_action, iteration)
+        else:
+            action, feasible = self.solve_casadi_optimization(obs, uncertified_action, iteration)
+        return action, feasible
+
+    def solve_casadi_optimization(self,
+                                  obs,
+                                  uncertified_action,
+                                  iteration=None,
+                                  ):
+        '''Solve the MPC optimization problem for a given observation and uncertified input.
+
+        Args:
+            obs (ndarray): Current state/observation.
+            uncertified_action (ndarray): The uncertified_controller's action.
+            iteration (int): The current iteration, used for trajectory tracking.
+
+        Returns:
+            action (ndarray): The certified action.
+            feasible (bool): Whether the safety filtering was feasible or not.
+        '''
         opti_dict = self.opti_dict
         opti = opti_dict['opti']
         z_var = opti_dict['z_var']
@@ -193,6 +229,54 @@ def solve_optimization(self,
             action = None
         return action, feasible
 
+    def solve_acados_optimization(self,
+                                  obs,
+                                  uncertified_action,
+                                  iteration=None,
+                                  ):
+        '''Solve the MPC optimization problem for a given observation and uncertified input.
+
+        Args:
+            obs (ndarray): Current state/observation.
+            uncertified_action (ndarray): The uncertified_controller's action.
+            iteration (int): The current iteration, used for trajectory tracking.
+
+        Returns:
+            action (ndarray): The certified action.
+            feasible (bool): Whether the safety filtering was feasible or not.
+        '''
+
+        ocp_solver = self.ocp_solver
+        ocp_solver.cost_set(0, 'yref', np.concatenate((np.zeros((self.model.nx)), np.squeeze(uncertified_action))))
+
+        if isinstance(self.cost_function, PRECOMPUTED_COST):
+            uncert_input_traj = self.cost_function.calculate_unsafe_path(obs, uncertified_action, iteration)
+
+            for stage in range(1, self.mpsc_cost_horizon):
+                ocp_solver.cost_set(stage, 'yref', np.concatenate((np.zeros((self.model.nx)), uncert_input_traj[:, stage])))
+
+        # Solve the optimization problem.
+        try:
+            action = ocp_solver.solve_for_x0(x0_bar=obs)
+            self.cost_prev = ocp_solver.get_cost()
+            x_val = np.zeros((self.horizon + 1, self.model.nx))
+            u_val = np.zeros((self.horizon, self.model.nu))
+            for i in range(self.horizon):
+                x_val[i, :] = ocp_solver.get(i, 'x')
+                u_val[i, :] = ocp_solver.get(i, 'u')
+            x_val[self.horizon, :] = ocp_solver.get(self.horizon, 'x')
+            self.z_prev = x_val.T
+            self.v_prev = u_val.T
+            # Take the first one from solved action sequence.
+            self.prev_action = action
+            feasible = True
+        except Exception as e:
+            print('Error Return Status:', ocp_solver.status)
+            print(e)
+            feasible = False
+            action = None
+        return action, feasible
+
     def certify_action(self,
                        current_state,
                        uncertified_action,
diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py
@@ -10,7 +10,7 @@ class MPSC_COST(ABC):
 
     def __init__(self,
                  env: BenchmarkEnv = None,
-                 mpsc_cost_horizon: int = 5,
+                 mpsc_cost_horizon: int = 1,
                  decay_factor: float = 0.85,
                  ):
         '''Initialize the MPSC Cost.
diff --git a/safe_control_gym/safety_filters/mpsc/nl_mpsc.py b/safe_control_gym/safety_filters/mpsc/nl_mpsc.py
@@ -16,8 +16,10 @@
 import casadi as cs
 import cvxpy as cp
 import numpy as np
+from acados_template import AcadosOcp, AcadosOcpSolver
+from acados_template.acados_model import AcadosModel
 from pytope import Polytope
-from scipy.linalg import solve_discrete_are, sqrtm
+from scipy.linalg import block_diag, solve_discrete_are, sqrtm
 
 from safe_control_gym.controllers.mpc.mpc_utils import discretize_linear_system, rk_discrete
 from safe_control_gym.envs.benchmark_env import Environment, Task
@@ -821,7 +823,7 @@ def save(self, path):
         with open(path, 'wb') as f:
             pickle.dump(parameters, f)
 
-    def setup_optimizer(self):
+    def setup_casadi_optimizer(self):
         '''Setup the certifying MPC problem.'''
 
         # Horizon parameter.
@@ -935,3 +937,127 @@ def setup_optimizer(self):
             cost = cost + self.slack_cost * slack_term
         opti.minimize(cost)
         self.opti_dict['cost'] = cost
+
+    def setup_acados_optimizer(self):
+        '''setup_optimizer_acados'''
+        # create ocp object to formulate the OCP
+        ocp = AcadosOcp()
+
+        # Setup model
+        model = AcadosModel()
+        model.x = self.model.x_sym
+        model.u = self.model.u_sym
+        model.f_expl_expr = self.model.x_dot
+
+        if self.env.NAME == Environment.CARTPOLE:
+            x1_dot = cs.MX.sym('x1_dot')
+            v_dot = cs.MX.sym('v_dot')
+            theta1_dot = cs.MX.sym('theta1_dot')
+            dtheta_dot = cs.MX.sym('dtheta_dot')
+            xdot = cs.vertcat(x1_dot, v_dot, theta1_dot, dtheta_dot)
+        elif self.env.NAME == Environment.QUADROTOR and self.env.QUAD_TYPE == 2:
+            x1_dot = cs.MX.sym('x1_dot')
+            vx_dot = cs.MX.sym('vx_dot')
+            z1_dot = cs.MX.sym('z1_dot')
+            vz_dot = cs.MX.sym('vz_dot')
+            theta1_dot = cs.MX.sym('theta1_dot')
+            dtheta_dot = cs.MX.sym('dtheta_dot')
+            xdot = cs.vertcat(x1_dot, vx_dot, z1_dot, vz_dot, theta1_dot, dtheta_dot)
+        else:
+            x1_dot = cs.MX.sym('x1_dot')
+            vx_dot = cs.MX.sym('vx_dot')
+            y1_dot = cs.MX.sym('y1_dot')
+            vy_dot = cs.MX.sym('vy_dot')
+            z1_dot = cs.MX.sym('z1_dot')
+            vz_dot = cs.MX.sym('vz_dot')
+            phi1_dot = cs.MX.sym('phi1_dot')  # Roll
+            theta1_dot = cs.MX.sym('theta1_dot')  # Pitch
+            psi1_dot = cs.MX.sym('psi1_dot')  # Yaw
+            p1_body_dot = cs.MX.sym('p1_body_dot')  # Body frame roll rate
+            q1_body_dot = cs.MX.sym('q1_body_dot')  # body frame pith rate
+            r1_body_dot = cs.MX.sym('r1_body_dot')  # body frame yaw rate
+            xdot = cs.vertcat(x1_dot, vx_dot, y1_dot, vy_dot, z1_dot, vz_dot, phi1_dot, theta1_dot, psi1_dot, p1_body_dot, q1_body_dot, r1_body_dot)
+
+        model.xdot = xdot
+        model.f_impl_expr = model.xdot - model.f_expl_expr
+        model.name = 'mpsf'
+        ocp.model = model
+
+        nx, nu = self.model.nx, self.model.nu
+        ny = nx + nu
+
+        ocp.dims.N = self.horizon
+
+        # set cost module
+        ocp.cost.cost_type = 'LINEAR_LS'
+        ocp.cost.cost_type_e = 'LINEAR_LS'
+
+        Q_mat = np.diag([0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1]) * 0  # .0001
+        ocp.cost.W_e = np.diag([0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1]) * 0.0001
+        R_mat = np.eye(nu)
+        ocp.cost.W = block_diag(Q_mat, R_mat)
+
+        ocp.cost.Vx = np.zeros((ny, nx))
+        ocp.cost.Vx[:nx, :] = np.eye(nx)
+        ocp.cost.Vu = np.zeros((ny, nu))
+        ocp.cost.Vu[nx:nx + nu, :] = np.eye(nu)
+        ocp.cost.Vx_e = np.eye(nx)
+
+        ocp.model.cost_y_expr = cs.vertcat(model.x, model.u)
+        ocp.model.cost_y_expr_e = model.x
+
+        # Updated on each iteration
+        ocp.cost.yref = np.concatenate((self.model.X_EQ, self.model.U_EQ))
+        ocp.cost.yref_e = self.model.X_EQ
+
+        # set constraints
+        ocp.constraints.constr_type = 'BGH'
+        ocp.constraints.constr_type_e = 'BGH'
+
+        ocp.constraints.x0 = self.model.X_EQ
+        ocp.constraints.C = self.L_x
+        ocp.constraints.D = self.L_u
+        ocp.constraints.lg = -1000 * np.ones((self.p))
+        ocp.constraints.ug = np.zeros((self.p))
+
+        # Slack
+        ocp.constraints.Jsg = np.eye(self.p)
+        ocp.cost.Zu = 0.1 * np.array([self.slack_cost] * self.p) / self.p
+        ocp.cost.Zl = 0.1 * np.array([self.slack_cost] * self.p) / self.p
+        ocp.cost.zu = 0.1 * np.array([self.slack_cost] * self.p) / self.p
+        ocp.cost.zl = 0.1 * np.array([self.slack_cost] * self.p) / self.p
+
+        # Options
+        ocp.solver_options.qp_solver = 'FULL_CONDENSING_HPIPM'
+        ocp.solver_options.hessian_approx = 'GAUSS_NEWTON'
+        ocp.solver_options.hpipm_mode = 'BALANCE'
+        ocp.solver_options.integrator_type = 'ERK'
+        # ocp.solver_options.sim_method_newton_iter = 3
+        ocp.solver_options.nlp_solver_type = 'SQP_RTI'
+        ocp.solver_options.nlp_solver_max_iter = 10
+        # ocp.solver_options.nlp_solver_step_length = 1.0
+
+        # set prediction horizon
+        ocp.solver_options.tf = self.dt * self.horizon
+
+        solver_json = 'acados_ocp_cartpole.json'
+        ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json)
+
+        for stage in range(self.mpsc_cost_horizon):
+            ocp_solver.cost_set(stage, 'W', (self.cost_function.decay_factor**stage) * ocp.cost.W)
+
+        for stage in range(self.mpsc_cost_horizon, self.horizon):
+            ocp_solver.cost_set(stage, 'W', 0.1 * ocp.cost.W)
+
+        s_var = np.zeros((self.horizon + 1))
+        g = np.zeros((self.horizon, self.p))
+
+        for i in range(self.horizon):
+            s_var[i + 1] = self.rho * s_var[i] + self.max_w
+            for j in range(self.p):
+                tighten_by = self.c_js[j] * s_var[i + 1]
+                g[i, j] = (self.l_xu[j] - tighten_by)
+            g[i, :] += (self.L_x @ self.X_mid) + (self.L_u @ self.U_mid)
+            ocp_solver.constraints_set(i, 'ug', g[i, :])
+
+        self.ocp_solver = ocp_solver