From dd95218674cd27ae1ab63e8add0392eb1a878146 Mon Sep 17 00:00:00 2001 From: Jacopo Panerati Date: Sun, 15 Oct 2023 17:37:02 +0400 Subject: [PATCH] Simplified RL aviaries --- README.md | 5 +- gym_pybullet_drones/__init__.py | 14 +- ...aseMultiagentAviary.py => BaseRLAviary.py} | 34 +- .../envs/{single_agent_rl => }/HoverAviary.py | 7 +- .../LeaderFollowerAviary.py | 21 +- gym_pybullet_drones/envs/RLAviary.py | 2 - gym_pybullet_drones/envs/__init__.py | 3 + .../envs/multi_agent_rl/FlockAviary.py | 265 --------------- .../envs/multi_agent_rl/__init__.py | 3 - .../single_agent_rl/BaseSingleAgentAviary.py | 305 ------------------ .../envs/single_agent_rl/FlyThruGateAviary.py | 240 -------------- .../envs/single_agent_rl/__init__.py | 3 - gym_pybullet_drones/examples/learn.py | 37 ++- 13 files changed, 59 insertions(+), 880 deletions(-) rename gym_pybullet_drones/envs/{multi_agent_rl/BaseMultiagentAviary.py => BaseRLAviary.py} (92%) rename gym_pybullet_drones/envs/{single_agent_rl => }/HoverAviary.py (97%) rename gym_pybullet_drones/envs/{multi_agent_rl => }/LeaderFollowerAviary.py (93%) delete mode 100644 gym_pybullet_drones/envs/RLAviary.py delete mode 100644 gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py delete mode 100644 gym_pybullet_drones/envs/multi_agent_rl/__init__.py delete mode 100644 gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py delete mode 100644 gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py delete mode 100644 gym_pybullet_drones/envs/single_agent_rl/__init__.py diff --git a/README.md b/README.md index c3829e2a9..0dbd807ed 100644 --- a/README.md +++ b/README.md @@ -99,10 +99,11 @@ If you wish, please cite our [IROS 2021 paper](https://arxiv.org/abs/2103.02142) ## TODO - [ ] Add `crazyflie-firmware` SITL support @spencerteetaert + +## DESIDERATA + - [ ] Add motor delay @JacopoPan / @spencerteetaert - [ ] Replace `rpy` with quaternions (and `ang_vel` with body rates) in `obs` @JacopoPan -- [ ] Replace `BaseSingleAgentAviary` and `BaseMultiAgentAviary` with a single `RLAviary`, incl. PR #161 @JacopoPan -- [ ] Add a multi-agent MDP with 2-drone chase through a gate @JacopoPan ----- > University of Toronto's [Dynamic Systems Lab](https://github.com/utiasDSL) / [Vector Institute](https://github.com/VectorInstitute) / University of Cambridge's [Prorok Lab](https://github.com/proroklab) diff --git a/gym_pybullet_drones/__init__.py b/gym_pybullet_drones/__init__.py index 365cbfaab..09b80fc51 100644 --- a/gym_pybullet_drones/__init__.py +++ b/gym_pybullet_drones/__init__.py @@ -12,20 +12,10 @@ register( id='hover-aviary-v0', - entry_point='gym_pybullet_drones.envs.single_agent_rl:HoverAviary', -) - -register( - id='flythrugate-aviary-v0', - entry_point='gym_pybullet_drones.envs.single_agent_rl:FlyThruGateAviary', -) - -register( - id='flock-aviary-v0', - entry_point='gym_pybullet_drones.envs.multi_agent_rl:FlockAviary', + entry_point='gym_pybullet_drones.envs:HoverAviary', ) register( id='leaderfollower-aviary-v0', - entry_point='gym_pybullet_drones.envs.multi_agent_rl:LeaderFollowerAviary', + entry_point='gym_pybullet_drones.envs:LeaderFollowerAviary', ) diff --git a/gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py b/gym_pybullet_drones/envs/BaseRLAviary.py similarity index 92% rename from gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py rename to gym_pybullet_drones/envs/BaseRLAviary.py index 40415449a..865baed20 100644 --- a/gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py +++ b/gym_pybullet_drones/envs/BaseRLAviary.py @@ -7,14 +7,14 @@ from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType, ImageType from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl -class BaseMultiagentAviary(BaseAviary): - """Base multi-agent environment class for reinforcement learning.""" +class BaseRLAviary(BaseAviary): + """Base single and multi-agent environment class for reinforcement learning.""" ################################################################################ def __init__(self, drone_model: DroneModel=DroneModel.CF2X, - num_drones: int=2, + num_drones: int=1, neighbourhood_radius: float=np.inf, initial_xyzs=None, initial_rpys=None, @@ -26,7 +26,7 @@ def __init__(self, obs: ObservationType=ObservationType.KIN, act: ActionType=ActionType.RPM ): - """Initialization of a generic multi-agent RL environment. + """Initialization of a generic single and multi-agent RL environment. Attributes `vision_attributes` and `dynamics_attributes` are selected based on the choice of `obs` and `act`; `obstacles` is set to True @@ -61,9 +61,6 @@ def __init__(self, The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.) """ - if num_drones < 2: - print("[ERROR] in BaseMultiagentAviary.__init__(), num_drones should be >= 2") - exit() vision_attributes = True if obs == ObservationType.RGB else False self.OBS_TYPE = obs self.ACT_TYPE = act @@ -74,7 +71,7 @@ def __init__(self, if drone_model in [DroneModel.CF2X, DroneModel.CF2P]: self.ctrl = [DSLPIDControl(drone_model=DroneModel.CF2X) for i in range(num_drones)] else: - print("[ERROR] in BaseMultiagentAviary.__init()__, no controller is available for the specified drone_model") + print("[ERROR] in BaseRLAviary.__init()__, no controller is available for the specified drone_model") super().__init__(drone_model=drone_model, num_drones=num_drones, neighbourhood_radius=neighbourhood_radius, @@ -144,7 +141,7 @@ def _actionSpace(self): elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]: size = 1 else: - print("[ERROR] in BaseMultiagentAviary._actionSpace()") + print("[ERROR] in BaseRLAviary._actionSpace()") exit() act_lower_bound = np.array([-1*np.ones(size) for i in range(self.NUM_DRONES)]) act_upper_bound = np.array([+1*np.ones(size) for i in range(self.NUM_DRONES)]) @@ -228,7 +225,7 @@ def _preprocessAction(self, ) rpm[k,:] = rpm else: - print("[ERROR] in BaseMultiagentAviary._preprocessAction()") + print("[ERROR] in BaseRLAviary._preprocessAction()") exit() return rpm @@ -244,10 +241,6 @@ def _observationSpace(self): """ if self.OBS_TYPE == ObservationType.RGB: - # return spaces.Dict({i: spaces.Box(low=0, - # high=255, - # shape=(self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8 - # ) for i in range(self.NUM_DRONES)}) return spaces.Box(low=0, high=255, shape=(self.NUM_DRONES, self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8) @@ -259,13 +252,14 @@ def _observationSpace(self): # obs_upper_bound = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) # return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 ) ############################################################ - #### OBS SPACE OF SIZE 12 + #### OBS SPACE OF SIZE 12, W/O RPMS + #### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ obs_lower_bound = np.array([[-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1] for i in range(self.NUM_DRONES)]) obs_upper_bound = np.array([[1,1,1, 1,1,1, 1,1,1, 1,1,1] for i in range(self.NUM_DRONES)]) return spaces.Box(low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32) ############################################################ else: - print("[ERROR] in BaseMultiagentAviary._observationSpace()") + print("[ERROR] in BaseRLAviary._observationSpace()") ################################################################################ @@ -291,21 +285,21 @@ def _computeObs(self): path=self.ONBOARD_IMG_PATH+"drone_"+str(i), frame_num=int(self.step_counter/self.IMG_CAPTURE_FREQ) ) - return np.array([self.rgb[i] for i in range(self.NUM_DRONES)]) + return np.array([self.rgb[i] for i in range(self.NUM_DRONES)]).astype('float32') elif self.OBS_TYPE == ObservationType.KIN: ############################################################ #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS) - # return { i : self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) } + # return np.array([ self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) ]).astype('float32') ############################################################ #### OBS SPACE OF SIZE 12 obs_12 = np.zeros((self.NUM_DRONES,12)) for i in range(self.NUM_DRONES): obs = self._clipAndNormalizeState(self._getDroneStateVector(i)) obs_12[i, :] = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,) - return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]) + return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32') ############################################################ else: - print("[ERROR] in BaseMultiagentAviary._computeObs()") + print("[ERROR] in BaseRLAviary._computeObs()") ################################################################################ diff --git a/gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py b/gym_pybullet_drones/envs/HoverAviary.py similarity index 97% rename from gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py rename to gym_pybullet_drones/envs/HoverAviary.py index 96fd10c2a..9908f3f50 100644 --- a/gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py +++ b/gym_pybullet_drones/envs/HoverAviary.py @@ -1,9 +1,9 @@ import numpy as np -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType, BaseSingleAgentAviary +from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary +from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType -class HoverAviary(BaseSingleAgentAviary): +class HoverAviary(BaseRLAviary): """Single agent RL problem: hover at position.""" ################################################################################ @@ -49,6 +49,7 @@ def __init__(self, """ super().__init__(drone_model=drone_model, + num_drones=1, initial_xyzs=initial_xyzs, initial_rpys=initial_rpys, physics=physics, diff --git a/gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py b/gym_pybullet_drones/envs/LeaderFollowerAviary.py similarity index 93% rename from gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py rename to gym_pybullet_drones/envs/LeaderFollowerAviary.py index 4722ac2aa..67d83c0cc 100644 --- a/gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py +++ b/gym_pybullet_drones/envs/LeaderFollowerAviary.py @@ -1,10 +1,9 @@ import numpy as np -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType -from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary +from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary +from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType -class LeaderFollowerAviary(BaseMultiagentAviary): +class LeaderFollowerAviary(BaseRLAviary): """Multi-agent RL problem: leader-follower.""" ################################################################################ @@ -79,13 +78,13 @@ def _computeReward(self): The reward value for each drone. """ - rewards = {} + rewards = np.zeros(self.NUM_DRONES) states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)]) rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 0.5]) - states[0, 0:3])**2 - # rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD + # rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD for i in range(1, self.NUM_DRONES): - rewards[i] = -(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2 - return rewards + rewards[i] = (-(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2) + return rewards[0] #TODO: return multiple rewards ################################################################################ @@ -100,9 +99,9 @@ def _computeTerminated(self): """ bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False - done = {i: bool_val for i in range(self.NUM_DRONES)} - done["__all__"] = bool_val # True if True in done.values() else False - return done + # done = {i: bool_val for i in range(self.NUM_DRONES)} + # done["__all__"] = bool_val # True if True in done.values() else False + return bool_val #TODO: return multiple terminatation values ################################################################################ diff --git a/gym_pybullet_drones/envs/RLAviary.py b/gym_pybullet_drones/envs/RLAviary.py deleted file mode 100644 index bbf5bdb4b..000000000 --- a/gym_pybullet_drones/envs/RLAviary.py +++ /dev/null @@ -1,2 +0,0 @@ -'''TBD -''' diff --git a/gym_pybullet_drones/envs/__init__.py b/gym_pybullet_drones/envs/__init__.py index d630927c1..c387403ee 100644 --- a/gym_pybullet_drones/envs/__init__.py +++ b/gym_pybullet_drones/envs/__init__.py @@ -1,2 +1,5 @@ +from gym_pybullet_drones.envs.BetaAviary import BetaAviary from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary +from gym_pybullet_drones.envs.HoverAviary import HoverAviary +from gym_pybullet_drones.envs.LeaderFollowerAviary import LeaderFollowerAviary from gym_pybullet_drones.envs.VelocityAviary import VelocityAviary diff --git a/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py b/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py deleted file mode 100644 index b28f8accd..000000000 --- a/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py +++ /dev/null @@ -1,265 +0,0 @@ -import math -import numpy as np - -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType -from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary - -class FlockAviary(BaseMultiagentAviary): - """Multi-agent RL problem: flocking.""" - - ################################################################################ - - def __init__(self, - drone_model: DroneModel=DroneModel.CF2X, - num_drones: int=2, - neighbourhood_radius: float=np.inf, - initial_xyzs=None, - initial_rpys=None, - physics: Physics=Physics.PYB, - pyb_freq: int = 240, - ctrl_freq: int = 240, - gui=False, - record=False, - obs: ObservationType=ObservationType.KIN, - act: ActionType=ActionType.RPM): - """Initialization of a multi-agent RL environment. - - Using the generic multi-agent RL superclass. - - Parameters - ---------- - drone_model : DroneModel, optional - The desired drone type (detailed in an .urdf file in folder `assets`). - num_drones : int, optional - The desired number of drones in the aviary. - neighbourhood_radius : float, optional - Radius used to compute the drones' adjacency matrix, in meters. - initial_xyzs: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones. - initial_rpys: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians). - physics : Physics, optional - The desired implementation of PyBullet physics/custom dynamics. - pyb_freq : int, optional - The frequency at which PyBullet steps (a multiple of ctrl_freq). - ctrl_freq : int, optional - The frequency at which the environment steps. - gui : bool, optional - Whether to use PyBullet's GUI. - record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. - obs : ObservationType, optional - The type of observation space (kinematic information or vision) - act : ActionType, optional - The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control) - - """ - super().__init__(drone_model=drone_model, - num_drones=num_drones, - neighbourhood_radius=neighbourhood_radius, - initial_xyzs=initial_xyzs, - initial_rpys=initial_rpys, - physics=physics, - pyb_freq=pyb_freq, - ctrl_freq=ctrl_freq, - gui=gui, - record=record, - obs=obs, - act=act - ) - - ################################################################################ - - def _computeReward(self): - """Computes the current reward value(s). - - Returns - ------- - dict[int, float] - The reward value for each drone. - - """ - rewards = {} - states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)]) - rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 1]) - states[0, 0:3])**2 - for i in range(1, self.NUM_DRONES): - rewards[i] = -1 * np.linalg.norm(states[i-1, 2] - states[i, 2])**2 - return rewards - """ - # obs here is dictionary of the form {"i":{"state": Box(20,), "neighbors": MultiBinary(NUM_DRONES)}} - # parse velocity and position - vel = np.zeros((1, self.NUM_DRONES, 3)); pos = np.zeros((1, self.NUM_DRONES, 3)) - for i in range(self.NUM_DRONES): - pos[0,i,:] = obs[ i ]["state"][0:3] - vel[0,i,:] = obs[ i ]["state"][10:13] - # compute metrics - # velocity alignment - ali = 0 - EPSILON = 1e-3 # avoid divide by zero - linear_vel_norm = np.linalg.norm(vel, axis=2) - for i in range(self.NUM_DRONES): - for j in range(self.NUM_DRONES): - if j != i: - d = np.einsum('ij,ij->i', vel[:, i, :], vel[:, j, :]) - ali += (d / (linear_vel_norm[:, i] + EPSILON) / (linear_vel_norm[:, j] + EPSILON)) - ali /= (self.NUM_DRONES * (self.NUM_DRONES - 1)) - # flocking speed - cof_v = np.mean(vel, axis=1) # center of flock speed - avg_flock_linear_speed = np.linalg.norm(cof_v, axis=-1) - # spacing - whole_flock_spacing = [] - for i in range(self.NUM_DRONES): - flck_neighbor_pos = np.delete(pos, [i], 1) - drone_neighbor_pos_diff = flck_neighbor_pos - np.reshape(pos[:, i, :], (pos[:, i, :].shape[0], 1, -1)) - drone_neighbor_dis = np.linalg.norm(drone_neighbor_pos_diff, axis=-1) - drone_spacing = np.amin(drone_neighbor_dis, axis=-1) - whole_flock_spacing.append(drone_spacing) - whole_flock_spacing = np.stack(whole_flock_spacing, axis=-1) - avg_flock_spacing = np.mean(whole_flock_spacing, axis=-1) - var_flock_spacing = np.var(whole_flock_spacing, axis=-1) - # flocking metrics - FLOCK_SPACING_MIN = 1.0; FLOCK_SPACING_MAX = 3.0 - if FLOCK_SPACING_MIN < avg_flock_spacing[0] < FLOCK_SPACING_MAX: - avg_flock_spac_rew = 0.0 - else: - avg_flock_spac_rew = min(math.fabs(avg_flock_spacing[0] - FLOCK_SPACING_MIN), - math.fabs(avg_flock_spacing[0] - FLOCK_SPACING_MAX)) - reward = ali[0] + avg_flock_linear_speed[0] - avg_flock_spac_rew - var_flock_spacing[0] - return { i : reward for i in range(self.NUM_DRONES) } - """ - - ################################################################################ - - def _computeTerminated(self): - """Computes the current done value(s). - - Returns - ------- - dict[int | "__all__", bool] - Dictionary with the done value of each drone and - one additional boolean value for key "__all__". - - """ - bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False - done = {i: bool_val for i in range(self.NUM_DRONES)} - done["__all__"] = True if True in done.values() else False - return done - - ################################################################################ - - def _computeTruncated(self): - """Computes the current truncated value(s). - - Unused in this implementation. - - Returns - ------- - bool - Always false. - - """ - return False - - ################################################################################ - - def _computeInfo(self): - """Computes the current info dict(s). - - Unused. - - Returns - ------- - dict[int, dict[]] - Dictionary of empty dictionaries. - - """ - return {i: {} for i in range(self.NUM_DRONES)} - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Parameters - ---------- - state : ndarray - (20,)-shaped array of floats containing the non-normalized state of a single drone. - - Returns - ------- - ndarray - (20,)-shaped array of floats containing the normalized state of a single drone. - - """ - MAX_LIN_VEL_XY = 3 - MAX_LIN_VEL_Z = 1 - - MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC - MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC - - MAX_PITCH_ROLL = np.pi # Full range - - clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY) - clipped_pos_z = np.clip(state[2], 0, MAX_Z) - clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL) - clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY) - clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z) - - if self.GUI: - self._clipAndNormalizeStateWarning(state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z - ) - - normalized_pos_xy = clipped_pos_xy / MAX_XY - normalized_pos_z = clipped_pos_z / MAX_Z - normalized_rp = clipped_rp / MAX_PITCH_ROLL - normalized_y = state[9] / np.pi # No reason to clip - normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY - normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY - normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16] - - norm_and_clipped = np.hstack([normalized_pos_xy, - normalized_pos_z, - state[3:7], - normalized_rp, - normalized_y, - normalized_vel_xy, - normalized_vel_z, - normalized_ang_vel, - state[16:20] - ]).reshape(20,) - - return norm_and_clipped - - ################################################################################ - - def _clipAndNormalizeStateWarning(self, - state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z, - ): - """Debugging printouts associated to `_clipAndNormalizeState`. - - Print a warning if values in a state vector is out of the clipping range. - - """ - if not(clipped_pos_xy == np.array(state[0:2])).all(): - print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1])) - if not(clipped_pos_z == np.array(state[2])).all(): - print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2])) - if not(clipped_rp == np.array(state[7:9])).all(): - print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8])) - if not(clipped_vel_xy == np.array(state[10:12])).all(): - print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11])) - if not(clipped_vel_z == np.array(state[12])).all(): - print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12])) diff --git a/gym_pybullet_drones/envs/multi_agent_rl/__init__.py b/gym_pybullet_drones/envs/multi_agent_rl/__init__.py deleted file mode 100644 index 09197c8d5..000000000 --- a/gym_pybullet_drones/envs/multi_agent_rl/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary -from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary -from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary diff --git a/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py b/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py deleted file mode 100644 index 5b4745cb3..000000000 --- a/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py +++ /dev/null @@ -1,305 +0,0 @@ -import os -from enum import Enum -import numpy as np -from gymnasium import spaces -import pybullet as p - -from gym_pybullet_drones.envs.BaseAviary import BaseAviary -from gym_pybullet_drones.utils.enums import DroneModel, Physics, ImageType, ActionType, ObservationType -from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl - - -class BaseSingleAgentAviary(BaseAviary): - """Base single drone environment class for reinforcement learning.""" - - ################################################################################ - - def __init__(self, - drone_model: DroneModel=DroneModel.CF2X, - initial_xyzs=None, - initial_rpys=None, - physics: Physics=Physics.PYB, - pyb_freq: int = 240, - ctrl_freq: int = 240, - gui=False, - record=False, - obs: ObservationType=ObservationType.KIN, - act: ActionType=ActionType.RPM - ): - """Initialization of a generic single agent RL environment. - - Attribute `num_drones` is automatically set to 1; `vision_attributes` - are selected based on the choice of `obs` and `act`; `obstacles` is - set to True and overridden with landmarks for vision applications; - `user_debug_gui` is set to False for performance. - - Parameters - ---------- - drone_model : DroneModel, optional - The desired drone type (detailed in an .urdf file in folder `assets`). - initial_xyzs: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones. - initial_rpys: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians). - physics : Physics, optional - The desired implementation of PyBullet physics/custom dynamics. - pyb_freq : int, optional - The frequency at which PyBullet steps (a multiple of ctrl_freq). - ctrl_freq : int, optional - The frequency at which the environment steps. - gui : bool, optional - Whether to use PyBullet's GUI. - record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. - obs : ObservationType, optional - The type of observation space (kinematic information or vision) - act : ActionType, optional - The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.) - - """ - vision_attributes = True if obs == ObservationType.RGB else False - self.OBS_TYPE = obs - self.ACT_TYPE = act - self.EPISODE_LEN_SEC = 5 - #### Create integrated controllers ######################### - if act in [ActionType.PID, ActionType.VEL, ActionType.ONE_D_PID]: - os.environ['KMP_DUPLICATE_LIB_OK']='True' - if drone_model in [DroneModel.CF2X, DroneModel.CF2P]: - self.ctrl = DSLPIDControl(drone_model=DroneModel.CF2X) - else: - print("[ERROR] in BaseSingleAgentAviary.__init()__, no controller is available for the specified drone_model") - super().__init__(drone_model=drone_model, - num_drones=1, - initial_xyzs=initial_xyzs, - initial_rpys=initial_rpys, - physics=physics, - pyb_freq=pyb_freq, - ctrl_freq=ctrl_freq, - gui=gui, - record=record, - obstacles=True, # Add obstacles for RGB observations and/or FlyThruGate - user_debug_gui=False, # Remove of RPM sliders from all single agent learning aviaries - vision_attributes=vision_attributes, - ) - #### Set a limit on the maximum target speed ############### - if act == ActionType.VEL: - self.SPEED_LIMIT = 0.03 * self.MAX_SPEED_KMH * (1000/3600) - - ################################################################################ - - def _addObstacles(self): - """Add obstacles to the environment. - - Only if the observation is of type RGB, 4 landmarks are added. - Overrides BaseAviary's method. - - """ - if self.OBS_TYPE == ObservationType.RGB: - p.loadURDF("block.urdf", - [1, 0, .1], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - p.loadURDF("cube_small.urdf", - [0, 1, .1], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - p.loadURDF("duck_vhacd.urdf", - [-1, 0, .1], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - p.loadURDF("teddy_vhacd.urdf", - [0, -1, .1], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - else: - pass - - ################################################################################ - - def _actionSpace(self): - """Returns the action space of the environment. - - Returns - ------- - ndarray - A Box() of size 1, 3, 4, or 6 depending on the action type. - - """ - if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]: - size = 4 - elif self.ACT_TYPE == ActionType.PID: - size = 3 - elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]: - size = 1 - else: - print("[ERROR] in BaseSingleAgentAviary._actionSpace()") - exit() - return spaces.Box(low=-1*np.ones(size), - # return spaces.Box(low=np.zeros(size), # Alternative action space, see PR #32 - high=np.ones(size), - dtype=np.float32 - ) - - ################################################################################ - - def _preprocessAction(self, - action - ): - """Pre-processes the action passed to `.step()` into motors' RPMs. - - Parameter `action` is processed differenly for each of the different - action types: `action` can be of length 1, 3, 4, or 6 and represent - RPMs, desired thrust and torques, the next target position to reach - using PID control, a desired velocity vector, new PID coefficients, etc. - - Parameters - ---------- - action : ndarray - The input action for each drone, to be translated into RPMs. - - Returns - ------- - ndarray - (4,)-shaped array of ints containing to clipped RPMs - commanded to the 4 motors of each drone. - - """ - if self.ACT_TYPE == ActionType.RPM: - return np.array(self.HOVER_RPM * (1+0.05*action)) - elif self.ACT_TYPE == ActionType.PID: - state = self._getDroneStateVector(0) - next_pos = self._calculateNextStep( - current_position=state[0:3], - destination=action, - step_size=1, - ) - rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP, - cur_pos=state[0:3], - cur_quat=state[3:7], - cur_vel=state[10:13], - cur_ang_vel=state[13:16], - target_pos=next_pos - ) - return rpm - elif self.ACT_TYPE == ActionType.VEL: - state = self._getDroneStateVector(0) - if np.linalg.norm(action[0:3]) != 0: - v_unit_vector = action[0:3] / np.linalg.norm(action[0:3]) - else: - v_unit_vector = np.zeros(3) - rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP, - cur_pos=state[0:3], - cur_quat=state[3:7], - cur_vel=state[10:13], - cur_ang_vel=state[13:16], - target_pos=state[0:3], # same as the current position - target_rpy=np.array([0,0,state[9]]), # keep current yaw - target_vel=self.SPEED_LIMIT * np.abs(action[3]) * v_unit_vector # target the desired velocity vector - ) - return rpm - elif self.ACT_TYPE == ActionType.ONE_D_RPM: - return np.repeat(self.HOVER_RPM * (1+0.05*action), 4) - elif self.ACT_TYPE == ActionType.ONE_D_PID: - state = self._getDroneStateVector(0) - rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP, - cur_pos=state[0:3], - cur_quat=state[3:7], - cur_vel=state[10:13], - cur_ang_vel=state[13:16], - target_pos=state[0:3]+0.1*np.array([0,0,action[0]]) - ) - return rpm - else: - print("[ERROR] in BaseSingleAgentAviary._preprocessAction()") - - ################################################################################ - - def _observationSpace(self): - """Returns the observation space of the environment. - - Returns - ------- - ndarray - A Box() of shape (H,W,4) or (12,) depending on the observation type. - - """ - if self.OBS_TYPE == ObservationType.RGB: - return spaces.Box(low=0, - high=255, - shape=(self.IMG_RES[1], self.IMG_RES[0], 4), - dtype=np.uint8 - ) - elif self.OBS_TYPE == ObservationType.KIN: - ############################################################ - #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS) - #### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ P0 P1 P2 P3 - # obs_lower_bound = np.array([-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]) - # obs_upper_bound = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) - # return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 ) - ############################################################ - #### OBS SPACE OF SIZE 12 - return spaces.Box(low=np.array([-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1]), - high=np.array([1,1,1, 1,1,1, 1,1,1, 1,1,1]), - dtype=np.float32 - ) - ############################################################ - else: - print("[ERROR] in BaseSingleAgentAviary._observationSpace()") - - ################################################################################ - - def _computeObs(self): - """Returns the current observation of the environment. - - Returns - ------- - ndarray - A Box() of shape (H,W,4) or (12,) depending on the observation type. - - """ - if self.OBS_TYPE == ObservationType.RGB: - if self.step_counter%self.IMG_CAPTURE_FREQ == 0: - self.rgb[0], self.dep[0], self.seg[0] = self._getDroneImages(0, - segmentation=False - ) - #### Printing observation to PNG frames example ############ - if self.RECORD: - self._exportImage(img_type=ImageType.RGB, - img_input=self.rgb[0], - path=self.ONBOARD_IMG_PATH, - frame_num=int(self.step_counter/self.IMG_CAPTURE_FREQ) - ) - return self.rgb[0] - elif self.OBS_TYPE == ObservationType.KIN: - obs = self._clipAndNormalizeState(self._getDroneStateVector(0)) - ############################################################ - #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS) - # return obs - ############################################################ - #### OBS SPACE OF SIZE 12 - ret = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,) - return ret.astype('float32') - ############################################################ - else: - print("[ERROR] in BaseSingleAgentAviary._computeObs()") - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Must be implemented in a subclass. - - Parameters - ---------- - state : ndarray - Array containing the non-normalized state of a single drone. - - """ - raise NotImplementedError diff --git a/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py b/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py deleted file mode 100644 index f1b072b20..000000000 --- a/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py +++ /dev/null @@ -1,240 +0,0 @@ -import os -import numpy as np -import pybullet as p -import pkg_resources - -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType, BaseSingleAgentAviary - - -class FlyThruGateAviary(BaseSingleAgentAviary): - """Single agent RL problem: fly through a gate.""" - - ################################################################################ - - def __init__(self, - drone_model: DroneModel=DroneModel.CF2X, - initial_xyzs=None, - initial_rpys=None, - physics: Physics=Physics.PYB, - pyb_freq: int = 240, - ctrl_freq: int = 240, - gui=False, - record=False, - obs: ObservationType=ObservationType.KIN, - act: ActionType=ActionType.RPM - ): - """Initialization of a single agent RL environment. - - Using the generic single agent RL superclass. - - Parameters - ---------- - drone_model : DroneModel, optional - The desired drone type (detailed in an .urdf file in folder `assets`). - initial_xyzs: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones. - initial_rpys: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians). - physics : Physics, optional - The desired implementation of PyBullet physics/custom dynamics. - pyb_freq : int, optional - The frequency at which PyBullet steps (a multiple of ctrl_freq). - ctrl_freq : int, optional - The frequency at which the environment steps. - gui : bool, optional - Whether to use PyBullet's GUI. - record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. - obs : ObservationType, optional - The type of observation space (kinematic information or vision) - act : ActionType, optional - The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control) - - """ - super().__init__(drone_model=drone_model, - initial_xyzs=initial_xyzs, - initial_rpys=initial_rpys, - physics=physics, - pyb_freq=pyb_freq, - ctrl_freq=ctrl_freq, - gui=gui, - record=record, - obs=obs, - act=act - ) - - ################################################################################ - - def _addObstacles(self): - """Add obstacles to the environment. - - Extends the superclass method and add the gate build of cubes and an architrave. - - """ - super()._addObstacles() - p.loadURDF(pkg_resources.resource_filename('gym_pybullet_drones', 'assets/architrave.urdf'), - [0, -1, .55], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - for i in range(10): - p.loadURDF("cube_small.urdf", - [-.3, -1, .02+i*0.05], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - p.loadURDF("cube_small.urdf", - [.3, -1, .02+i*0.05], - p.getQuaternionFromEuler([0,0,0]), - physicsClientId=self.CLIENT - ) - - ################################################################################ - - def _computeReward(self): - """Computes the current reward value. - - Returns - ------- - float - The reward. - - """ - state = self._getDroneStateVector(0) - norm_ep_time = (self.step_counter/self.PYB_FREQ) / self.EPISODE_LEN_SEC - return -10 * np.linalg.norm(np.array([0, -2*norm_ep_time, 0.75])-state[0:3])**2 - - ################################################################################ - - def _computeTerminated(self): - """Computes the current done value. - - Returns - ------- - bool - Whether the current episode is done. - - """ - if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC: - return True - else: - return False - - ################################################################################ - - def _computeTruncated(self): - """Computes the current truncated value(s). - - Unused in this implementation. - - Returns - ------- - bool - Always false. - - """ - return False - - ################################################################################ - - def _computeInfo(self): - """Computes the current info dict(s). - - Unused. - - Returns - ------- - dict[str, int] - Dummy value. - - """ - return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Parameters - ---------- - state : ndarray - (20,)-shaped array of floats containing the non-normalized state of a single drone. - - Returns - ------- - ndarray - (20,)-shaped array of floats containing the normalized state of a single drone. - - """ - MAX_LIN_VEL_XY = 3 - MAX_LIN_VEL_Z = 1 - - MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC - MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC - - MAX_PITCH_ROLL = np.pi # Full range - - clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY) - clipped_pos_z = np.clip(state[2], 0, MAX_Z) - clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL) - clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY) - clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z) - - if self.GUI: - self._clipAndNormalizeStateWarning(state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z - ) - - normalized_pos_xy = clipped_pos_xy / MAX_XY - normalized_pos_z = clipped_pos_z / MAX_Z - normalized_rp = clipped_rp / MAX_PITCH_ROLL - normalized_y = state[9] / np.pi # No reason to clip - normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY - normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY - normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16] - - norm_and_clipped = np.hstack([normalized_pos_xy, - normalized_pos_z, - state[3:7], - normalized_rp, - normalized_y, - normalized_vel_xy, - normalized_vel_z, - normalized_ang_vel, - state[16:20] - ]).reshape(20,) - - return norm_and_clipped - - ################################################################################ - - def _clipAndNormalizeStateWarning(self, - state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z, - ): - """Debugging printouts associated to `_clipAndNormalizeState`. - - Print a warning if values in a state vector is out of the clipping range. - - """ - if not(clipped_pos_xy == np.array(state[0:2])).all(): - print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1])) - if not(clipped_pos_z == np.array(state[2])).all(): - print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2])) - if not(clipped_rp == np.array(state[7:9])).all(): - print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8])) - if not(clipped_vel_xy == np.array(state[10:12])).all(): - print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11])) - if not(clipped_vel_z == np.array(state[12])).all(): - print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12])) diff --git a/gym_pybullet_drones/envs/single_agent_rl/__init__.py b/gym_pybullet_drones/envs/single_agent_rl/__init__.py deleted file mode 100644 index b50f296b4..000000000 --- a/gym_pybullet_drones/envs/single_agent_rl/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import BaseSingleAgentAviary -from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary -from gym_pybullet_drones.envs.single_agent_rl.FlyThruGateAviary import FlyThruGateAviary diff --git a/gym_pybullet_drones/examples/learn.py b/gym_pybullet_drones/examples/learn.py index 97c69d768..ecfef73e6 100644 --- a/gym_pybullet_drones/examples/learn.py +++ b/gym_pybullet_drones/examples/learn.py @@ -21,7 +21,8 @@ from stable_baselines3 import PPO from gym_pybullet_drones.utils.Logger import Logger -from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary +from gym_pybullet_drones.envs.HoverAviary import HoverAviary +from gym_pybullet_drones.envs.LeaderFollowerAviary import LeaderFollowerAviary from gym_pybullet_drones.utils.utils import sync, str2bool DEFAULT_GUI = True @@ -32,12 +33,13 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=DEFAULT_COLAB, record_video=DEFAULT_RECORD_VIDEO): #### Check the environment's spaces ######################## - env = gym.make("hover-aviary-v0") - print("[INFO] Action space:", env.action_space) - print("[INFO] Observation space:", env.observation_space) + # env = gym.make('hover-aviary-v0') + env = gym.make('leaderfollower-aviary-v0') + print('[INFO] Action space:', env.action_space) + print('[INFO] Observation space:', env.observation_space) #### Train the model ####################################### - model = PPO("MlpPolicy", + model = PPO('MlpPolicy', env, verbose=1 ) @@ -59,9 +61,16 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=D deterministic=True ) obs, reward, terminated, truncated, info = env.step(action) + obs2 = obs.squeeze() + act2 = action.squeeze() + print("Obs:", obs, "\tAction", action, "\tReward:", reward, "\tTerminated:", terminated, "\tTruncated:", truncated) logger.log(drone=0, timestamp=i/env.CTRL_FREQ, - state=np.hstack([obs[0:3], np.zeros(4), obs[3:15], np.resize(action, (4))]), + state=np.hstack([obs2[0:3], + np.zeros(4), + obs2[3:15], + act2 + ]), control=np.zeros(12) ) env.render() @@ -74,7 +83,7 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=D if plot: logger.plot() -if __name__ == "__main__": +if __name__ == '__main__': #### Define and parse (optional) arguments for the script ## parser = argparse.ArgumentParser(description='Single agent reinforcement learning example script using HoverAviary') parser.add_argument('--gui', default=DEFAULT_GUI, type=str2bool, help='Whether to use PyBullet GUI (default: True)', metavar='') @@ -293,32 +302,32 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=D # ) # #### Create eveluation environment ######################### -# if obs == ObservationType.KIN: +# if obs == ObservationType.KIN: # eval_env = gym.make(env_name, # aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, # obs=obs, # act=act # ) # elif obs == ObservationType.RGB: -# if env_name == "takeoff-aviary-v0": +# if env_name == "takeoff-aviary-v0": # eval_env = make_vec_env(TakeoffAviary, # env_kwargs=sa_env_kwargs, # n_envs=1, # seed=0 # ) -# if env_name == "hover-aviary-v0": +# if env_name == "hover-aviary-v0": # eval_env = make_vec_env(HoverAviary, # env_kwargs=sa_env_kwargs, # n_envs=1, # seed=0 # ) -# if env_name == "flythrugate-aviary-v0": +# if env_name == "flythrugate-aviary-v0": # eval_env = make_vec_env(FlyThruGateAviary, # env_kwargs=sa_env_kwargs, # n_envs=1, # seed=0 # ) -# if env_name == "tune-aviary-v0": +# if env_name == "tune-aviary-v0": # eval_env = make_vec_env(TuneAviary, # env_kwargs=sa_env_kwargs, # n_envs=1, @@ -362,8 +371,8 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=D # parser.add_argument('--algo', default=DEFAULT_ALGO, type=str, choices=['a2c', 'ppo', 'sac', 'td3', 'ddpg'], help='RL agent (default: ppo)', metavar='') # parser.add_argument('--obs', default=DEFAULT_OBS, type=ObservationType, help='Observation space (default: kin)', metavar='') # parser.add_argument('--act', default=DEFAULT_ACT, type=ActionType, help='Action space (default: one_d_rpm)', metavar='') -# parser.add_argument('--cpu', default=DEFAULT_CPU, type=int, help='Number of training environments (default: 1)', metavar='') -# parser.add_argument('--steps', default=DEFAULT_STEPS, type=int, help='Number of training time steps (default: 35000)', metavar='') +# parser.add_argument('--cpu', default=DEFAULT_CPU, type=int, help='Number of training environments (default: 1)', metavar='') +# parser.add_argument('--steps', default=DEFAULT_STEPS, type=int, help='Number of training time steps (default: 35000)', metavar='') # parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='') # ARGS = parser.parse_args()