|
| 1 | +import sys |
| 2 | +sys.path.insert(0, '/home/federico/GitHub/safe-control-gym') |
| 3 | + |
| 4 | +from functools import partial |
| 5 | + |
| 6 | +import numpy as np |
| 7 | + |
| 8 | +from experiments.crazyflie.crazyflie_utils import gen_traj |
| 9 | +from safe_control_gym.controllers.firmware.firmware_wrapper import Command |
| 10 | +from safe_control_gym.envs.benchmark_env import Task |
| 11 | +from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function |
| 12 | +from safe_control_gym.utils.registration import make |
| 13 | + |
| 14 | +save_data = True |
| 15 | +TEST = 0 |
| 16 | +CERTIFY = True |
| 17 | +COST_FUNCTION = 'one_step_cost' |
| 18 | +M = 2 |
| 19 | + |
| 20 | +algo = 'lqr' |
| 21 | +task = 'quadrotor' |
| 22 | +sf = 'nl_mpsc' |
| 23 | + |
| 24 | +algo_config = { |
| 25 | + 'q_lqr': [0.1], |
| 26 | + 'r_lqr': [0.1], |
| 27 | + 'discrete_dynamics': True |
| 28 | +} |
| 29 | +task_config = { |
| 30 | + 'info_in_reset': True, |
| 31 | + 'ctrl_freq': 25, |
| 32 | + 'pyb_freq': 1000, |
| 33 | + 'physics': 'pyb', |
| 34 | + 'gui': False, |
| 35 | + 'quad_type': 3, |
| 36 | + 'normalized_rl_action_space': False, |
| 37 | + 'episode_len_sec': 20, |
| 38 | + 'inertial_prop': |
| 39 | + {'M': 0.03775, |
| 40 | + 'Ixx': 1.4e-05, |
| 41 | + 'Iyy': 1.4e-05, |
| 42 | + 'Izz': 2.17e-05}, |
| 43 | + 'randomized_inertial_prop': False, |
| 44 | + 'task': 'stabilization', |
| 45 | + 'task_info': |
| 46 | + {'stabilization_goal': [0.5, -0.5, 2], |
| 47 | + 'stabilization_goal_tolerance': 0.0}, |
| 48 | + 'cost': 'quadratic', |
| 49 | + 'constraints': |
| 50 | + [ |
| 51 | + {'constraint_form': 'default_constraint', |
| 52 | + 'constrained_variable': 'state', |
| 53 | + 'upper_bounds': [0.75, 0.5, 1, 1, 2, 1, 0.2, 0.2, 0.2, 1, 1, 1], |
| 54 | + 'lower_bounds': [-0.75, -0.5, -1, -1, 0, -1, -0.2, -0.2, -0.2, -1, -1, -1]}, |
| 55 | + {'constraint_form': 'default_constraint', |
| 56 | + 'constrained_variable': 'input', |
| 57 | + } |
| 58 | + ], |
| 59 | + 'done_on_violation': False, |
| 60 | + 'done_on_out_of_bound': True, |
| 61 | + 'seed': 1337, |
| 62 | +} |
| 63 | +sf_config = { |
| 64 | + 'r_lin': [90], |
| 65 | + 'q_lin': [0.001, 0.06, 0.001, 0.06, 0.00025, 80, 1e-05, 1e-05, 0.75, 1, 1, 1], |
| 66 | + 'horizon': 10, |
| 67 | + 'warmstart': True, |
| 68 | + 'integration_algo': 'rk4', |
| 69 | + 'use_terminal_set': True, |
| 70 | + 'cost_function': COST_FUNCTION, |
| 71 | + 'mpsc_cost_horizon': M, |
| 72 | + 'decay_factor': 0.85, |
| 73 | + 'prior_info': { |
| 74 | + 'prior_prop': None, |
| 75 | + 'randomize_prior_prop': False, |
| 76 | + 'prior_prop_rand_info': None}, |
| 77 | + 'n_samples': 600 |
| 78 | +} |
| 79 | + |
| 80 | + |
| 81 | +class Controller(): |
| 82 | + '''Template controller class. ''' |
| 83 | + |
| 84 | + def __init__(self, |
| 85 | + initial_obs, |
| 86 | + initial_info, |
| 87 | + ): |
| 88 | + '''Initialization of the controller. |
| 89 | +
|
| 90 | + Args: |
| 91 | + initial_obs (ndarray): The initial observation of the quadrotor's state |
| 92 | + [x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r]. |
| 93 | + initial_info (dict): The a priori information as a dictionary. |
| 94 | + ''' |
| 95 | + # Save environment and conrol parameters. |
| 96 | + self.CTRL_FREQ = initial_info['ctrl_freq'] |
| 97 | + self.CTRL_DT = 1.0 / self.CTRL_FREQ |
| 98 | + self.initial_obs = initial_obs |
| 99 | + |
| 100 | + # Create trajectory. |
| 101 | + self.full_trajectory = gen_traj(self.CTRL_FREQ, 20) |
| 102 | + self.lqr_gain = 0.05 * np.array([[4, 0.1]]) |
| 103 | + |
| 104 | + self.prev_vel = 0 |
| 105 | + self.prev_x = 0 |
| 106 | + self.alpha = 0.3 |
| 107 | + |
| 108 | + # Reset counters and buffers. |
| 109 | + self.reset() |
| 110 | + |
| 111 | + def cmdFirmware(self, |
| 112 | + ep_time, |
| 113 | + obs, |
| 114 | + ): |
| 115 | + '''Pick command sent to the quadrotor through a Crazyswarm/Crazyradio-like interface. |
| 116 | +
|
| 117 | + Args: |
| 118 | + ep_time (float): Episode's elapsed time, in seconds. |
| 119 | + obs (ndarray): The quadrotor's Vicon data [x, 0, y, 0, z, 0, phi, theta, psi, 0, 0, 0]. |
| 120 | + reward (float, optional): The reward signal. |
| 121 | + done (bool, optional): Wether the episode has terminated. |
| 122 | + info (dict, optional): Current step information as a dictionary with keys |
| 123 | + 'constraint_violation', 'current_target_gate_pos', etc. |
| 124 | +
|
| 125 | + Returns: |
| 126 | + Command: selected type of command (takeOff, cmdFullState, etc., see Enum-like class `Command`). |
| 127 | + List: arguments for the type of command (see comments in class `Command`) |
| 128 | + ''' |
| 129 | + |
| 130 | + iteration = int(ep_time * self.CTRL_FREQ) |
| 131 | + x_eq = 0 # -2.36 |
| 132 | + y_eq = 0 # 0.762 |
| 133 | + z_height = 0.5 # 1 |
| 134 | + |
| 135 | + if iteration > 0: |
| 136 | + est_vel = (obs[0] - self.prev_x) / self.CTRL_DT |
| 137 | + self.prev_vel = (1 - self.alpha) * self.prev_vel + self.alpha * est_vel |
| 138 | + self.prev_x = obs[0] |
| 139 | + obs[1] = self.prev_vel |
| 140 | + |
| 141 | + if iteration == 0: |
| 142 | + print(f'Iter: {iteration} - Take off.') |
| 143 | + height = z_height |
| 144 | + duration = 2 |
| 145 | + command_type = Command(2) # Take-off. |
| 146 | + args = [height, duration] |
| 147 | + elif iteration >= 2 * self.CTRL_FREQ and iteration < 3 * self.CTRL_FREQ: |
| 148 | + print(f'Iter: {iteration} - Re-centering at (x_eq, y_eq, z_height).') |
| 149 | + target_pos = np.array([x_eq, y_eq, z_height]) |
| 150 | + target_vel = np.zeros(3) |
| 151 | + target_acc = np.zeros(3) |
| 152 | + target_yaw = 0.0 |
| 153 | + target_rpy_rates = np.zeros(3) |
| 154 | + command_type = Command(1) # cmdFullState. |
| 155 | + args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] |
| 156 | + elif iteration >= 3 * self.CTRL_FREQ and iteration < 23 * self.CTRL_FREQ: |
| 157 | + print(f'Iter: {iteration} - Executing Trajectory.') |
| 158 | + step = iteration - 5 * self.CTRL_FREQ |
| 159 | + info = {'current_step': step} |
| 160 | + curr_obs = np.atleast_2d(np.array([obs[0] - x_eq, obs[1]])).T # np.atleast_2d(np.array([obs[2]-y_eq, obs[3]])).T |
| 161 | + # new_act = -(-self.lqr_gain @ (curr_obs - self.full_trajectory[[step]].T))[0,0] |
| 162 | + new_act = self.ctrl.select_action(curr_obs, info)[0] |
| 163 | + new_act = np.clip(new_act, -0.25, 0.25) |
| 164 | + # new_act = -(self.full_trajectory[step, 0] - obs[0]) |
| 165 | + if CERTIFY is True: |
| 166 | + certified_action, success = self.safety_filter.certify_action(curr_obs, new_act, info) |
| 167 | + if success: |
| 168 | + self.corrections.append(certified_action - new_act) |
| 169 | + new_act = certified_action |
| 170 | + else: |
| 171 | + self.corrections.append(0.0) |
| 172 | + |
| 173 | + target_pos = np.array([new_act + obs[0], y_eq, z_height]) # np.array([x_eq, new_act + obs[2], z_height]) |
| 174 | + target_vel = np.zeros(3) |
| 175 | + target_acc = np.zeros(3) |
| 176 | + target_yaw = 0.0 |
| 177 | + target_rpy_rates = np.zeros(3) |
| 178 | + |
| 179 | + command_type = Command(1) # cmdFullState. |
| 180 | + args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] |
| 181 | + |
| 182 | + self.recorded_obs.append(obs) |
| 183 | + self.actions.append(new_act) |
| 184 | + elif iteration >= 23 * self.CTRL_FREQ and iteration < 25 * self.CTRL_FREQ: |
| 185 | + print(f'Iter: {iteration} - Re-centering at (x_eq, y_eq, z_height).') |
| 186 | + target_pos = np.array([x_eq, y_eq, z_height]) |
| 187 | + target_vel = np.zeros(3) |
| 188 | + target_acc = np.zeros(3) |
| 189 | + target_yaw = 0.0 |
| 190 | + target_rpy_rates = np.zeros(3) |
| 191 | + command_type = Command(1) # cmdFullState. |
| 192 | + args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] |
| 193 | + elif iteration == 25 * self.CTRL_FREQ: |
| 194 | + print(f'Iter: {iteration} - Setpoint Stop.') |
| 195 | + command_type = Command(6) # Notify setpoint stop. |
| 196 | + args = [] |
| 197 | + elif iteration == 25 * self.CTRL_FREQ + 1: |
| 198 | + print(f'Iter: {iteration} - Landing.') |
| 199 | + height = 0.1 |
| 200 | + duration = 2 |
| 201 | + command_type = Command(3) # Land. |
| 202 | + args = [height, duration] |
| 203 | + elif iteration == 27 * self.CTRL_FREQ: |
| 204 | + print(f'Iter: {iteration} - Terminating.') |
| 205 | + command_type = Command(-1) # Terminate. |
| 206 | + args = [] |
| 207 | + if save_data is True: |
| 208 | + if not CERTIFY: |
| 209 | + folder = 'uncert' |
| 210 | + else: |
| 211 | + folder = 'cert/' + sf_config['cost_function'] |
| 212 | + if sf_config['cost_function'] == 'precomputed_cost': |
| 213 | + folder += '/m' + str(sf_config['mpsc_cost_horizon']) |
| 214 | + np.save(f'/home/federico/GitHub/safe-control-gym/experiments/crazyflie/all_trajs/test{TEST}/{folder}/traj_goal.npy', self.full_trajectory) |
| 215 | + np.save(f'/home/federico/GitHub/safe-control-gym/experiments/crazyflie/all_trajs/test{TEST}/{folder}/states.npy', np.array(self.recorded_obs)) |
| 216 | + np.save(f'/home/federico/GitHub/safe-control-gym/experiments/crazyflie/all_trajs/test{TEST}/{folder}/actions.npy', np.array(self.actions)) |
| 217 | + np.save(f'/home/federico/GitHub/safe-control-gym/experiments/crazyflie/all_trajs/test{TEST}/{folder}/corrections.npy', np.array(self.corrections)) |
| 218 | + else: |
| 219 | + command_type = Command(0) # None. |
| 220 | + args = [] |
| 221 | + |
| 222 | + self.prev_obs = obs |
| 223 | + |
| 224 | + return command_type, args |
| 225 | + |
| 226 | + def setup_controllers(self): |
| 227 | + task_name = 'stab' if task_config['task'] == Task.STABILIZATION else 'track' |
| 228 | + |
| 229 | + task_config['gui'] = False |
| 230 | + env_func = partial(make, |
| 231 | + task, |
| 232 | + **task_config) |
| 233 | + |
| 234 | + # Setup controller. |
| 235 | + self.ctrl = make(algo, |
| 236 | + env_func, |
| 237 | + **algo_config) |
| 238 | + self.ctrl.gain = self.lqr_gain |
| 239 | + self.ctrl.model.U_EQ = 0 |
| 240 | + |
| 241 | + self.ctrl.env.X_GOAL = self.full_trajectory |
| 242 | + self.ctrl.env.TASK = Task.TRAJ_TRACKING |
| 243 | + |
| 244 | + if CERTIFY is True: |
| 245 | + # Setup MPSC. |
| 246 | + self.safety_filter = make(sf, |
| 247 | + env_func, |
| 248 | + **sf_config) |
| 249 | + self.safety_filter.reset() |
| 250 | + self.safety_filter.load(path=f'/home/federico/GitHub/safe-control-gym/experiments/crazyflie/models/mpsc_parameters/{sf}_crazyflie_{task_name}.pkl') |
| 251 | + |
| 252 | + if sf_config['cost_function'] == Cost_Function.PRECOMPUTED_COST: |
| 253 | + self.safety_filter.cost_function.uncertified_controller = self.ctrl |
| 254 | + self.safety_filter.cost_function.output_dir = '/home/federico/GitHub/safe-control-gym/experiments/crazyflie' |
| 255 | + self.safety_filter.env.X_GOAL = self.full_trajectory |
| 256 | + |
| 257 | + def reset(self): |
| 258 | + '''Reset. ''' |
| 259 | + self.recorded_obs = [] |
| 260 | + self.actions = [] |
| 261 | + self.corrections = [] |
| 262 | + self.prev_obs = np.zeros((12, 1)) |
| 263 | + self.setup_controllers() |
0 commit comments