From 0d9c7807197cef60993f2fb30b2c5e1ff71c76cb Mon Sep 17 00:00:00 2001 From: Shambhuraj Sawant Date: Tue, 30 Jul 2024 13:04:00 +0200 Subject: [PATCH] Small fix for disturbance in quad --- .../ppo_quadrotor_2D_attitude.yaml | 10 +++---- .../quadrotor_2D_attitude_track.yaml | 27 +++++++++++-------- .../sac_quadrotor_2D_attitude.yaml | 10 +++---- .../envs/gym_pybullet_drones/quadrotor.py | 5 ++-- 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml index 460daf3fb..d0a5cd8f6 100644 --- a/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml +++ b/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml @@ -14,15 +14,15 @@ algo_config: critic_lr: 0.001 # runner args - max_env_steps: 480000 + max_env_steps: 540000 rollout_batch_size: 4 - rollout_steps: 1000 + rollout_steps: 540 eval_batch_size: 50 # misc - log_interval: 8000 - save_interval: 0 + log_interval: 10800 + save_interval: 540000 num_checkpoints: 0 - eval_interval: 8000 + eval_interval: 10800 eval_save_best: True tensorboard: False diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml index 45015a868..5f8bb1b11 100644 --- a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml +++ b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml @@ -5,12 +5,12 @@ task_config: pyb_freq: 1200 physics: pyb quad_type: 4 - normalized_rl_action_space: True + normalized_rl_action_space: False init_state: init_x: 0 init_x_dot: 0 - init_z: 1.15 + init_z: 1.0 init_z_dot: 0 init_theta: 0 init_theta_dot: 0 @@ -20,20 +20,20 @@ task_config: init_state_randomization_info: init_x: distrib: 'uniform' - low: -0.01 - high: 0.01 + low: -0.02 + high: 0.02 init_x_dot: distrib: 'uniform' - low: -0.01 - high: 0.01 + low: -0.02 + high: 0.02 init_z: distrib: 'uniform' - low: -0.01 - high: 0.01 + low: -0.02 + high: 0.02 init_z_dot: distrib: 'uniform' - low: -0.01 - high: 0.01 + low: -0.02 + high: 0.02 init_theta: distrib: 'uniform' low: -0.02 @@ -60,10 +60,15 @@ task_config: obs_goal_horizon: 1 # RL Reward - rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.1, 0.1] + rew_state_weight: [10.0, 0.1, 10.0, 0.1, 0.1, 0.001] rew_act_weight: 0.1 rew_exponential: True + disturbances: + observation: + - disturbance_func: white_noise + std: [0.02, 0.02, 0.04, 0.04, 0.04, 0.1, 0., 0., 0., 0., 0., 0.] + # constraints: # - constraint_form: default_constraint # constrained_variable: state diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/sac_quadrotor_2D_attitude.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/sac_quadrotor_2D_attitude.yaml index 2e7f2db4e..b43cb5cc4 100644 --- a/examples/rl/config_overrides/quadrotor_2D_attitude/sac_quadrotor_2D_attitude.yaml +++ b/examples/rl/config_overrides/quadrotor_2D_attitude/sac_quadrotor_2D_attitude.yaml @@ -12,18 +12,18 @@ algo_config: entropy_lr: 0.001 # runner args - max_env_steps: 200000 + max_env_steps: 540000 warm_up_steps: 1000 rollout_batch_size: 4 num_workers: 1 - max_buffer_size: 50000 + max_buffer_size: 54000 deque_size: 50 eval_batch_size: 50 # misc - log_interval: 4000 - save_interval: 0 + log_interval: 10800 + save_interval: 540000 num_checkpoints: 0 - eval_interval: 4000 + eval_interval: 10800 eval_save_best: True tensorboard: False diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py index 6c0ba4a92..f695dd241 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py +++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py @@ -1023,8 +1023,6 @@ def _get_observation(self): # Apply observation disturbance. obs = deepcopy(self.state) - if 'observation' in self.disturbances: - obs = self.disturbances['observation'].apply(obs, self) # Concatenate goal info (references state(s)) for RL. # Plus two because ctrl_step_counter has not incremented yet, and we want to return the obs (which would be @@ -1034,6 +1032,9 @@ def _get_observation(self): obs = self.extend_obs(obs, 1) else: obs = self.extend_obs(obs, self.ctrl_step_counter + 2) + + if 'observation' in self.disturbances: + obs = self.disturbances['observation'].apply(obs, self) return obs def _get_reward(self):