Skip to content

Commit 57fe487

Browse files
Adding basic code to run fast NL acados MPSF
1 parent da1f5a2 commit 57fe487

File tree

10 files changed

+232
-17
lines changed

10 files changed

+232
-17
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ results/
1313
z_docstring.py
1414
TODOs.md
1515

16+
**/c_generated_code/**
17+
**/acados_**
1618

1719

1820
# macOS users

experiments/mpsc/config_overrides/cartpole/nl_mpsc_cartpole.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ sf_config:
1010
- 0.5
1111

1212
# MPC Parameters
13+
use_acados: False
1314
horizon: 20
1415
warmstart: True
1516
integration_algo: rk4

experiments/mpsc/config_overrides/quadrotor_2D/nl_mpsc_quadrotor_2D.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ sf_config:
1212
- 0.5
1313

1414
# MPC Parameters
15+
use_acados: False
1516
horizon: 20
1617
warmstart: True
1718
integration_algo: rk4

experiments/mpsc/config_overrides/quadrotor_3D/nl_mpsc_quadrotor_3D.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@ sf_config:
1818
- 1
1919

2020
# MPC Parameters
21+
use_acados: True
2122
horizon: 20
2223
warmstart: True
2324
integration_algo: rk4
24-
use_terminal_set: True
25+
use_terminal_set: False
2526

2627
# Prior info
2728
prior_info:

experiments/mpsc/mpsc_experiment.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ fi
2626
# SAFETY_FILTER='linear_mpsc'
2727
SAFETY_FILTER='nl_mpsc'
2828

29-
# MPSC_COST='one_step_cost'
29+
MPSC_COST='one_step_cost'
3030
# MPSC_COST='constant_cost'
3131
# MPSC_COST='regularized_cost'
3232
# MPSC_COST='lqr_cost'
33-
MPSC_COST='precomputed_cost'
33+
# MPSC_COST='precomputed_cost'
3434
# MPSC_COST='learned_cost'
3535

3636
MPSC_COST_HORIZON=2

experiments/mpsc/train_model.sbatch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ python3 train_rl.py \
104104
--overrides \
105105
./config_overrides/${SYS}/${ALGO}_${SYS}.yaml \
106106
./config_overrides/${SYS}/${SYS}_${TASK}.yaml \
107-
./config_overrides/${SYS}/${SAFETY_FILTER}_${SYS}_linear.yaml \
107+
./config_overrides/${SYS}/${SAFETY_FILTER}_${SYS}.yaml \
108108
--output_dir ./models/rl_models/${SYS}/${TASK}/${ALGO}/${TAG}/ \
109109
--seed 2 \
110110
--kv_overrides \

safe_control_gym/controllers/ppo/ppo.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -304,11 +304,11 @@ def train_step(self):
304304
certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
305305
if success and self.filter_train_actions is True:
306306
action = self.env.envs[0].normalize_action(certified_action)
307-
elif not success:
308-
self.safety_filter.setup_optimizer()
309-
certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
310-
if success and self.filter_train_actions is True:
311-
action = self.env.envs[0].normalize_action(certified_action)
307+
# elif not success:
308+
# self.safety_filter.setup_optimizer()
309+
# certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
310+
# if success and self.filter_train_actions is True:
311+
# action = self.env.envs[0].normalize_action(certified_action)
312312

313313
action = np.atleast_2d(np.squeeze([action]))
314314
next_obs, rew, done, info = self.env.step(action)
@@ -435,15 +435,15 @@ def env_reset(self, env):
435435

436436
if self.use_safe_reset is True and self.safety_filter is not None:
437437

438-
while success is not True or np.any(self.safety_filter.slack_prev > 10e-6):
438+
while success is not True: # or np.any(self.safety_filter.slack_prev > 10e-6):
439439
obs, info = env.reset()
440440
info['current_step'] = 1
441441
physical_action = self.env.envs[0].denormalize_action(act)
442442
unextended_obs = np.squeeze(obs)[:self.env.envs[0].symbolic.nx]
443443
self.safety_filter.reset_before_run()
444444
_, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
445-
if not success:
446-
self.safety_filter.setup_optimizer()
447-
_, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
445+
# if not success:
446+
# self.safety_filter.setup_optimizer()
447+
# _, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
448448

449449
return obs, info

safe_control_gym/safety_filters/mpsc/mpsc.py

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def __init__(self,
4040
cost_function: Cost_Function = Cost_Function.ONE_STEP_COST,
4141
mpsc_cost_horizon: int = 5,
4242
decay_factor: float = 0.85,
43+
use_acados: bool = False,
4344
**kwargs
4445
):
4546
'''Initialize the MPSC.
@@ -97,6 +98,7 @@ def __init__(self,
9798

9899
if cost_function == Cost_Function.ONE_STEP_COST:
99100
self.cost_function = ONE_STEP_COST()
101+
self.mpsc_cost_horizon = 1
100102
self.cost_function.mpsc_cost_horizon = 1
101103
elif cost_function == Cost_Function.CONSTANT_COST:
102104
self.cost_function = CONSTANT_COST(self.env, mpsc_cost_horizon, decay_factor)
@@ -116,9 +118,21 @@ def set_dynamics(self):
116118
'''Compute the dynamics.'''
117119
raise NotImplementedError
118120

119-
@abstractmethod
120121
def setup_optimizer(self):
121122
'''Setup the certifying MPC problem.'''
123+
if self.use_acados:
124+
self.setup_acados_optimizer()
125+
else:
126+
self.setup_casadi_optimizer()
127+
128+
@abstractmethod
129+
def setup_casadi_optimizer(self):
130+
'''Setup the certifying MPC problem using CasADi.'''
131+
raise NotImplementedError
132+
133+
@abstractmethod
134+
def setup_acados_optimizer(self):
135+
'''Setup the certifying MPC problem using ACADOS.'''
122136
raise NotImplementedError
123137

124138
def before_optimization(self, obs):
@@ -146,6 +160,28 @@ def solve_optimization(self,
146160
feasible (bool): Whether the safety filtering was feasible or not.
147161
'''
148162

163+
if self.use_acados:
164+
action, feasible = self.solve_acados_optimization(obs, uncertified_action, iteration)
165+
else:
166+
action, feasible = self.solve_casadi_optimization(obs, uncertified_action, iteration)
167+
return action, feasible
168+
169+
def solve_casadi_optimization(self,
170+
obs,
171+
uncertified_action,
172+
iteration=None,
173+
):
174+
'''Solve the MPC optimization problem for a given observation and uncertified input.
175+
176+
Args:
177+
obs (ndarray): Current state/observation.
178+
uncertified_action (ndarray): The uncertified_controller's action.
179+
iteration (int): The current iteration, used for trajectory tracking.
180+
181+
Returns:
182+
action (ndarray): The certified action.
183+
feasible (bool): Whether the safety filtering was feasible or not.
184+
'''
149185
opti_dict = self.opti_dict
150186
opti = opti_dict['opti']
151187
z_var = opti_dict['z_var']
@@ -193,6 +229,54 @@ def solve_optimization(self,
193229
action = None
194230
return action, feasible
195231

232+
def solve_acados_optimization(self,
233+
obs,
234+
uncertified_action,
235+
iteration=None,
236+
):
237+
'''Solve the MPC optimization problem for a given observation and uncertified input.
238+
239+
Args:
240+
obs (ndarray): Current state/observation.
241+
uncertified_action (ndarray): The uncertified_controller's action.
242+
iteration (int): The current iteration, used for trajectory tracking.
243+
244+
Returns:
245+
action (ndarray): The certified action.
246+
feasible (bool): Whether the safety filtering was feasible or not.
247+
'''
248+
249+
ocp_solver = self.ocp_solver
250+
ocp_solver.cost_set(0, 'yref', np.concatenate((np.zeros((self.model.nx)), np.squeeze(uncertified_action))))
251+
252+
if isinstance(self.cost_function, PRECOMPUTED_COST):
253+
uncert_input_traj = self.cost_function.calculate_unsafe_path(obs, uncertified_action, iteration)
254+
255+
for stage in range(1, self.mpsc_cost_horizon):
256+
ocp_solver.cost_set(stage, 'yref', np.concatenate((np.zeros((self.model.nx)), uncert_input_traj[:, stage])))
257+
258+
# Solve the optimization problem.
259+
try:
260+
action = ocp_solver.solve_for_x0(x0_bar=obs)
261+
self.cost_prev = ocp_solver.get_cost()
262+
x_val = np.zeros((self.horizon + 1, self.model.nx))
263+
u_val = np.zeros((self.horizon, self.model.nu))
264+
for i in range(self.horizon):
265+
x_val[i, :] = ocp_solver.get(i, 'x')
266+
u_val[i, :] = ocp_solver.get(i, 'u')
267+
x_val[self.horizon, :] = ocp_solver.get(self.horizon, 'x')
268+
self.z_prev = x_val.T
269+
self.v_prev = u_val.T
270+
# Take the first one from solved action sequence.
271+
self.prev_action = action
272+
feasible = True
273+
except Exception as e:
274+
print('Error Return Status:', ocp_solver.status)
275+
print(e)
276+
feasible = False
277+
action = None
278+
return action, feasible
279+
196280
def certify_action(self,
197281
current_state,
198282
uncertified_action,

safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ class MPSC_COST(ABC):
1010

1111
def __init__(self,
1212
env: BenchmarkEnv = None,
13-
mpsc_cost_horizon: int = 5,
13+
mpsc_cost_horizon: int = 1,
1414
decay_factor: float = 0.85,
1515
):
1616
'''Initialize the MPSC Cost.

safe_control_gym/safety_filters/mpsc/nl_mpsc.py

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
import casadi as cs
1717
import cvxpy as cp
1818
import numpy as np
19+
from acados_template import AcadosOcp, AcadosOcpSolver
20+
from acados_template.acados_model import AcadosModel
1921
from pytope import Polytope
20-
from scipy.linalg import solve_discrete_are, sqrtm
22+
from scipy.linalg import block_diag, solve_discrete_are, sqrtm
2123

2224
from safe_control_gym.controllers.mpc.mpc_utils import discretize_linear_system, rk_discrete
2325
from safe_control_gym.envs.benchmark_env import Environment, Task
@@ -821,7 +823,7 @@ def save(self, path):
821823
with open(path, 'wb') as f:
822824
pickle.dump(parameters, f)
823825

824-
def setup_optimizer(self):
826+
def setup_casadi_optimizer(self):
825827
'''Setup the certifying MPC problem.'''
826828

827829
# Horizon parameter.
@@ -935,3 +937,127 @@ def setup_optimizer(self):
935937
cost = cost + self.slack_cost * slack_term
936938
opti.minimize(cost)
937939
self.opti_dict['cost'] = cost
940+
941+
def setup_acados_optimizer(self):
942+
'''setup_optimizer_acados'''
943+
# create ocp object to formulate the OCP
944+
ocp = AcadosOcp()
945+
946+
# Setup model
947+
model = AcadosModel()
948+
model.x = self.model.x_sym
949+
model.u = self.model.u_sym
950+
model.f_expl_expr = self.model.x_dot
951+
952+
if self.env.NAME == Environment.CARTPOLE:
953+
x1_dot = cs.MX.sym('x1_dot')
954+
v_dot = cs.MX.sym('v_dot')
955+
theta1_dot = cs.MX.sym('theta1_dot')
956+
dtheta_dot = cs.MX.sym('dtheta_dot')
957+
xdot = cs.vertcat(x1_dot, v_dot, theta1_dot, dtheta_dot)
958+
elif self.env.NAME == Environment.QUADROTOR and self.env.QUAD_TYPE == 2:
959+
x1_dot = cs.MX.sym('x1_dot')
960+
vx_dot = cs.MX.sym('vx_dot')
961+
z1_dot = cs.MX.sym('z1_dot')
962+
vz_dot = cs.MX.sym('vz_dot')
963+
theta1_dot = cs.MX.sym('theta1_dot')
964+
dtheta_dot = cs.MX.sym('dtheta_dot')
965+
xdot = cs.vertcat(x1_dot, vx_dot, z1_dot, vz_dot, theta1_dot, dtheta_dot)
966+
else:
967+
x1_dot = cs.MX.sym('x1_dot')
968+
vx_dot = cs.MX.sym('vx_dot')
969+
y1_dot = cs.MX.sym('y1_dot')
970+
vy_dot = cs.MX.sym('vy_dot')
971+
z1_dot = cs.MX.sym('z1_dot')
972+
vz_dot = cs.MX.sym('vz_dot')
973+
phi1_dot = cs.MX.sym('phi1_dot') # Roll
974+
theta1_dot = cs.MX.sym('theta1_dot') # Pitch
975+
psi1_dot = cs.MX.sym('psi1_dot') # Yaw
976+
p1_body_dot = cs.MX.sym('p1_body_dot') # Body frame roll rate
977+
q1_body_dot = cs.MX.sym('q1_body_dot') # body frame pith rate
978+
r1_body_dot = cs.MX.sym('r1_body_dot') # body frame yaw rate
979+
xdot = cs.vertcat(x1_dot, vx_dot, y1_dot, vy_dot, z1_dot, vz_dot, phi1_dot, theta1_dot, psi1_dot, p1_body_dot, q1_body_dot, r1_body_dot)
980+
981+
model.xdot = xdot
982+
model.f_impl_expr = model.xdot - model.f_expl_expr
983+
model.name = 'mpsf'
984+
ocp.model = model
985+
986+
nx, nu = self.model.nx, self.model.nu
987+
ny = nx + nu
988+
989+
ocp.dims.N = self.horizon
990+
991+
# set cost module
992+
ocp.cost.cost_type = 'LINEAR_LS'
993+
ocp.cost.cost_type_e = 'LINEAR_LS'
994+
995+
Q_mat = np.diag([0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1]) * 0 # .0001
996+
ocp.cost.W_e = np.diag([0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1]) * 0.0001
997+
R_mat = np.eye(nu)
998+
ocp.cost.W = block_diag(Q_mat, R_mat)
999+
1000+
ocp.cost.Vx = np.zeros((ny, nx))
1001+
ocp.cost.Vx[:nx, :] = np.eye(nx)
1002+
ocp.cost.Vu = np.zeros((ny, nu))
1003+
ocp.cost.Vu[nx:nx + nu, :] = np.eye(nu)
1004+
ocp.cost.Vx_e = np.eye(nx)
1005+
1006+
ocp.model.cost_y_expr = cs.vertcat(model.x, model.u)
1007+
ocp.model.cost_y_expr_e = model.x
1008+
1009+
# Updated on each iteration
1010+
ocp.cost.yref = np.concatenate((self.model.X_EQ, self.model.U_EQ))
1011+
ocp.cost.yref_e = self.model.X_EQ
1012+
1013+
# set constraints
1014+
ocp.constraints.constr_type = 'BGH'
1015+
ocp.constraints.constr_type_e = 'BGH'
1016+
1017+
ocp.constraints.x0 = self.model.X_EQ
1018+
ocp.constraints.C = self.L_x
1019+
ocp.constraints.D = self.L_u
1020+
ocp.constraints.lg = -1000 * np.ones((self.p))
1021+
ocp.constraints.ug = np.zeros((self.p))
1022+
1023+
# Slack
1024+
ocp.constraints.Jsg = np.eye(self.p)
1025+
ocp.cost.Zu = 0.1 * np.array([self.slack_cost] * self.p) / self.p
1026+
ocp.cost.Zl = 0.1 * np.array([self.slack_cost] * self.p) / self.p
1027+
ocp.cost.zu = 0.1 * np.array([self.slack_cost] * self.p) / self.p
1028+
ocp.cost.zl = 0.1 * np.array([self.slack_cost] * self.p) / self.p
1029+
1030+
# Options
1031+
ocp.solver_options.qp_solver = 'FULL_CONDENSING_HPIPM'
1032+
ocp.solver_options.hessian_approx = 'GAUSS_NEWTON'
1033+
ocp.solver_options.hpipm_mode = 'BALANCE'
1034+
ocp.solver_options.integrator_type = 'ERK'
1035+
# ocp.solver_options.sim_method_newton_iter = 3
1036+
ocp.solver_options.nlp_solver_type = 'SQP_RTI'
1037+
ocp.solver_options.nlp_solver_max_iter = 10
1038+
# ocp.solver_options.nlp_solver_step_length = 1.0
1039+
1040+
# set prediction horizon
1041+
ocp.solver_options.tf = self.dt * self.horizon
1042+
1043+
solver_json = 'acados_ocp_cartpole.json'
1044+
ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json)
1045+
1046+
for stage in range(self.mpsc_cost_horizon):
1047+
ocp_solver.cost_set(stage, 'W', (self.cost_function.decay_factor**stage) * ocp.cost.W)
1048+
1049+
for stage in range(self.mpsc_cost_horizon, self.horizon):
1050+
ocp_solver.cost_set(stage, 'W', 0.1 * ocp.cost.W)
1051+
1052+
s_var = np.zeros((self.horizon + 1))
1053+
g = np.zeros((self.horizon, self.p))
1054+
1055+
for i in range(self.horizon):
1056+
s_var[i + 1] = self.rho * s_var[i] + self.max_w
1057+
for j in range(self.p):
1058+
tighten_by = self.c_js[j] * s_var[i + 1]
1059+
g[i, j] = (self.l_xu[j] - tighten_by)
1060+
g[i, :] += (self.L_x @ self.X_mid) + (self.L_u @ self.U_mid)
1061+
ocp_solver.constraints_set(i, 'ug', g[i, :])
1062+
1063+
self.ocp_solver = ocp_solver

0 commit comments

Comments
 (0)