From a693f439afe68ea2eea5073d47e4c97aaf4feb4e Mon Sep 17 00:00:00 2001 From: Jacopo Panerati Date: Sun, 19 Nov 2023 16:27:32 +0400 Subject: [PATCH] Action buffer in observations --- gym_pybullet_drones/envs/BaseRLAviary.py | 66 +++++++------- gym_pybullet_drones/envs/HoverAviary.py | 88 ------------------ .../envs/LeaderFollowerAviary.py | 89 ------------------- 3 files changed, 35 insertions(+), 208 deletions(-) diff --git a/gym_pybullet_drones/envs/BaseRLAviary.py b/gym_pybullet_drones/envs/BaseRLAviary.py index 0d9a38da1..a40f510ff 100644 --- a/gym_pybullet_drones/envs/BaseRLAviary.py +++ b/gym_pybullet_drones/envs/BaseRLAviary.py @@ -2,6 +2,7 @@ import numpy as np import pybullet as p from gymnasium import spaces +from collections import deque from gym_pybullet_drones.envs.BaseAviary import BaseAviary from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType, ImageType @@ -61,6 +62,10 @@ def __init__(self, The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.) """ + #### Create a buffer for the last 10 actions ############### + self.ACTION_BUFFER_SIZE = 10 + self.action_buffer = deque(maxlen=self.ACTION_BUFFER_SIZE) + #### vision_attributes = True if obs == ObservationType.RGB else False self.OBS_TYPE = obs self.ACT_TYPE = act @@ -145,6 +150,10 @@ def _actionSpace(self): exit() act_lower_bound = np.array([-1*np.ones(size) for i in range(self.NUM_DRONES)]) act_upper_bound = np.array([+1*np.ones(size) for i in range(self.NUM_DRONES)]) + # + for i in range(self.ACTION_BUFFER_SIZE): + self.action_buffer.append(np.zeros((self.NUM_DRONES,size))) + # return spaces.Box(low=act_lower_bound, high=act_upper_bound, dtype=np.float32) ################################################################################ @@ -176,6 +185,7 @@ def _preprocessAction(self, commanded to the 4 motors of each drone. """ + self.action_buffer.append(action) rpm = np.zeros((self.NUM_DRONES,4)) for k in range(action.shape[0]): target = action[k, :] @@ -246,16 +256,25 @@ def _observationSpace(self): shape=(self.NUM_DRONES, self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8) elif self.OBS_TYPE == ObservationType.KIN: ############################################################ - #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS) - #### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ P0 P1 P2 P3 - # obs_lower_bound = np.array([-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]) - # obs_upper_bound = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) - # return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 ) - ############################################################ - #### OBS SPACE OF SIZE 12, W/O RPMS + #### OBS SPACE OF SIZE 12 #### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ - obs_lower_bound = np.array([[-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1] for i in range(self.NUM_DRONES)]) - obs_upper_bound = np.array([[1,1,1, 1,1,1, 1,1,1, 1,1,1] for i in range(self.NUM_DRONES)]) + lo = -np.inf + hi = np.inf + obs_lower_bound = np.array([[lo,lo,0, lo,lo,lo,lo,lo,lo,lo,lo,lo] for i in range(self.NUM_DRONES)]) + obs_upper_bound = np.array([[hi,hi,hi,hi,hi,hi,hi,hi,hi,hi,hi,hi] for i in range(self.NUM_DRONES)]) + #### Add action buffer to observation space ################ + act_lo = -1 + act_hi = +1 + for i in range(self.ACTION_BUFFER_SIZE): + if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]: + obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo,act_lo] for i in range(self.NUM_DRONES)])]) + obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi,act_hi] for i in range(self.NUM_DRONES)])]) + elif self.ACT_TYPE==ActionType.PID: + obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo] for i in range(self.NUM_DRONES)])]) + obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi] for i in range(self.NUM_DRONES)])]) + elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]: + obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo] for i in range(self.NUM_DRONES)])]) + obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi] for i in range(self.NUM_DRONES)])]) return spaces.Box(low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32) ############################################################ else: @@ -287,33 +306,18 @@ def _computeObs(self): ) return np.array([self.rgb[i] for i in range(self.NUM_DRONES)]).astype('float32') elif self.OBS_TYPE == ObservationType.KIN: - ############################################################ - #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS) - # return np.array([ self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) ]).astype('float32') ############################################################ #### OBS SPACE OF SIZE 12 obs_12 = np.zeros((self.NUM_DRONES,12)) for i in range(self.NUM_DRONES): - obs = self._clipAndNormalizeState(self._getDroneStateVector(i)) + #obs = self._clipAndNormalizeState(self._getDroneStateVector(i)) + obs = self._getDroneStateVector(i) obs_12[i, :] = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,) - return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32') + ret = np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32') + #### Add action buffer to observation ####################### + for i in range(self.ACTION_BUFFER_SIZE): + ret = np.hstack([ret, np.array([self.action_buffer[i][j, :] for j in range(self.NUM_DRONES)])]) + return ret ############################################################ else: print("[ERROR] in BaseRLAviary._computeObs()") - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Must be implemented in a subclass. - - Parameters - ---------- - state : ndarray - Array containing the non-normalized state of a single drone. - - """ - raise NotImplementedError diff --git a/gym_pybullet_drones/envs/HoverAviary.py b/gym_pybullet_drones/envs/HoverAviary.py index c66388570..737decf1d 100644 --- a/gym_pybullet_drones/envs/HoverAviary.py +++ b/gym_pybullet_drones/envs/HoverAviary.py @@ -124,91 +124,3 @@ def _computeInfo(self): """ return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Parameters - ---------- - state : ndarray - (20,)-shaped array of floats containing the non-normalized state of a single drone. - - Returns - ------- - ndarray - (20,)-shaped array of floats containing the normalized state of a single drone. - - """ - MAX_LIN_VEL_XY = 3 - MAX_LIN_VEL_Z = 1 - - MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC - MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC - - MAX_PITCH_ROLL = np.pi # Full range - - clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY) - clipped_pos_z = np.clip(state[2], 0, MAX_Z) - clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL) - clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY) - clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z) - - if self.GUI: - self._clipAndNormalizeStateWarning(state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z - ) - - normalized_pos_xy = clipped_pos_xy / MAX_XY - normalized_pos_z = clipped_pos_z / MAX_Z - normalized_rp = clipped_rp / MAX_PITCH_ROLL - normalized_y = state[9] / np.pi # No reason to clip - normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY - normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY - normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16] - - norm_and_clipped = np.hstack([normalized_pos_xy, - normalized_pos_z, - state[3:7], - normalized_rp, - normalized_y, - normalized_vel_xy, - normalized_vel_z, - normalized_ang_vel, - state[16:20] - ]).reshape(20,) - - return norm_and_clipped - - ################################################################################ - - def _clipAndNormalizeStateWarning(self, - state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z, - ): - """Debugging printouts associated to `_clipAndNormalizeState`. - - Print a warning if values in a state vector is out of the clipping range. - - """ - if not(clipped_pos_xy == np.array(state[0:2])).all(): - print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1])) - if not(clipped_pos_z == np.array(state[2])).all(): - print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2])) - if not(clipped_rp == np.array(state[7:9])).all(): - print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8])) - if not(clipped_vel_xy == np.array(state[10:12])).all(): - print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11])) - if not(clipped_vel_z == np.array(state[12])).all(): - print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12])) diff --git a/gym_pybullet_drones/envs/LeaderFollowerAviary.py b/gym_pybullet_drones/envs/LeaderFollowerAviary.py index 27865b024..336206328 100644 --- a/gym_pybullet_drones/envs/LeaderFollowerAviary.py +++ b/gym_pybullet_drones/envs/LeaderFollowerAviary.py @@ -80,7 +80,6 @@ def _computeReward(self): The reward. """ - rewards = np.zeros(self.NUM_DRONES) states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)]) ret = max(0, 500 - np.linalg.norm(self.TARGET_POS-states[0, 0:3])**2) for i in range(1, self.NUM_DRONES): @@ -134,91 +133,3 @@ def _computeInfo(self): """ return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Parameters - ---------- - state : ndarray - (20,)-shaped array of floats containing the non-normalized state of a single drone. - - Returns - ------- - ndarray - (20,)-shaped array of floats containing the normalized state of a single drone. - - """ - MAX_LIN_VEL_XY = 3 - MAX_LIN_VEL_Z = 1 - - MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC - MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC - - MAX_PITCH_ROLL = np.pi # Full range - - clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY) - clipped_pos_z = np.clip(state[2], 0, MAX_Z) - clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL) - clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY) - clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z) - - if self.GUI: - self._clipAndNormalizeStateWarning(state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z - ) - - normalized_pos_xy = clipped_pos_xy / MAX_XY - normalized_pos_z = clipped_pos_z / MAX_Z - normalized_rp = clipped_rp / MAX_PITCH_ROLL - normalized_y = state[9] / np.pi # No reason to clip - normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY - normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY - normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16] - - norm_and_clipped = np.hstack([normalized_pos_xy, - normalized_pos_z, - state[3:7], - normalized_rp, - normalized_y, - normalized_vel_xy, - normalized_vel_z, - normalized_ang_vel, - state[16:20] - ]).reshape(20,) - - return norm_and_clipped - - ################################################################################ - - def _clipAndNormalizeStateWarning(self, - state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z, - ): - """Debugging printouts associated to `_clipAndNormalizeState`. - - Print a warning if values in a state vector is out of the clipping range. - - """ - if not(clipped_pos_xy == np.array(state[0:2])).all(): - print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1])) - if not(clipped_pos_z == np.array(state[2])).all(): - print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2])) - if not(clipped_rp == np.array(state[7:9])).all(): - print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8])) - if not(clipped_vel_xy == np.array(state[10:12])).all(): - print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11])) - if not(clipped_vel_z == np.array(state[12])).all(): - print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12]))