diff --git a/.gitignore b/.gitignore index 98af37ec3..f24b20352 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,6 @@ examples/debug.py examples/test.py # NumPy saves and videos -files/logs/*.npy -files/videos/*.mp4 results/ tmp/ diff --git a/README.md b/README.md index c3829e2a9..8ae17360a 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This is a minimalist refactoring of the original `gym-pybullet-drones` repositor > **NOTE**: if you prefer to access the original codebase, presented at IROS in 2021, please `git checkout [paper|master]` after cloning the repo, and refer to the corresponding `README.md`'s. -formation flight control info +formation flight control info ## Installation @@ -24,18 +24,27 @@ pip3 install -e . # if needed, `sudo apt install build-essential` to install `gc ## Use -### PID position control example +### PID control examples ```sh cd gym_pybullet_drones/examples/ -python3 pid.py +python3 pid.py # position and velocity reference +python3 pid_velocity.py # desired velocity reference ``` -### Stable-baselines3 PPO RL example +### Downwash effect example ```sh cd gym_pybullet_drones/examples/ -python3 learn.py +python3 downwash.py +``` + +### Stable-baselines3 PPO RL 3'-training examples + +```sh +cd gym_pybullet_drones/examples/ +python learn.py # task: single drone hover at z == 1 +python learn.py --multiagent true # task: 2-drone hover at z == 1.2 and 0.7 ``` ### Betaflight SITL example (Ubuntu only) @@ -96,13 +105,14 @@ If you wish, please cite our [IROS 2021 paper](https://arxiv.org/abs/2103.02142) - C. Karen Liu and Dan Negrut (2020) [*The Role of Physics-Based Simulators in Robotics*](https://www.annualreviews.org/doi/pdf/10.1146/annurev-control-072220-093055) - Yunlong Song, Selim Naji, Elia Kaufmann, Antonio Loquercio, and Davide Scaramuzza (2020) [*Flightmare: A Flexible Quadrotor Simulator*](https://arxiv.org/pdf/2009.00563.pdf) -## TODO +## Core Team WIP + +- [ ] Multi-drone `crazyflie-firmware` SITL support (@spencerteetaert, @JacopoPan) + +## Desired Contributions/PRs -- [ ] Add `crazyflie-firmware` SITL support @spencerteetaert -- [ ] Add motor delay @JacopoPan / @spencerteetaert -- [ ] Replace `rpy` with quaternions (and `ang_vel` with body rates) in `obs` @JacopoPan -- [ ] Replace `BaseSingleAgentAviary` and `BaseMultiAgentAviary` with a single `RLAviary`, incl. PR #161 @JacopoPan -- [ ] Add a multi-agent MDP with 2-drone chase through a gate @JacopoPan +- [ ] Add motor delay by implementing a buffer in `BaseAviary._dynamics()` +- [ ] Replace `rpy` with quaternions (and `ang_vel` with body rates) by editing `BaseAviary._updateAndStoreKinematicInformation()`, `BaseAviary._getDroneStateVector()`, and the `.computeObs()` methods of relevant subclasses ----- > University of Toronto's [Dynamic Systems Lab](https://github.com/utiasDSL) / [Vector Institute](https://github.com/VectorInstitute) / University of Cambridge's [Prorok Lab](https://github.com/proroklab) diff --git a/files/logs/foo.txt b/files/logs/foo.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/files/readme_images/2020.gif b/files/readme_images/2020.gif deleted file mode 100644 index 92bc0fe11..000000000 Binary files a/files/readme_images/2020.gif and /dev/null differ diff --git a/files/readme_images/dep.gif b/files/readme_images/dep.gif deleted file mode 100644 index ff67941dd..000000000 Binary files a/files/readme_images/dep.gif and /dev/null differ diff --git a/files/readme_images/downwash.gif b/files/readme_images/downwash.gif deleted file mode 100644 index 3ed4372f4..000000000 Binary files a/files/readme_images/downwash.gif and /dev/null differ diff --git a/files/readme_images/downwash.png b/files/readme_images/downwash.png deleted file mode 100644 index 34e19a8d2..000000000 Binary files a/files/readme_images/downwash.png and /dev/null differ diff --git a/files/readme_images/rgb.gif b/files/readme_images/rgb.gif deleted file mode 100644 index 26c2017cd..000000000 Binary files a/files/readme_images/rgb.gif and /dev/null differ diff --git a/files/readme_images/seg.gif b/files/readme_images/seg.gif deleted file mode 100644 index 9b4e0ccae..000000000 Binary files a/files/readme_images/seg.gif and /dev/null differ diff --git a/gym_pybullet_drones/__init__.py b/gym_pybullet_drones/__init__.py index 365cbfaab..db2933aa1 100644 --- a/gym_pybullet_drones/__init__.py +++ b/gym_pybullet_drones/__init__.py @@ -12,20 +12,10 @@ register( id='hover-aviary-v0', - entry_point='gym_pybullet_drones.envs.single_agent_rl:HoverAviary', + entry_point='gym_pybullet_drones.envs:HoverAviary', ) register( - id='flythrugate-aviary-v0', - entry_point='gym_pybullet_drones.envs.single_agent_rl:FlyThruGateAviary', -) - -register( - id='flock-aviary-v0', - entry_point='gym_pybullet_drones.envs.multi_agent_rl:FlockAviary', -) - -register( - id='leaderfollower-aviary-v0', - entry_point='gym_pybullet_drones.envs.multi_agent_rl:LeaderFollowerAviary', + id='multihover-aviary-v0', + entry_point='gym_pybullet_drones.envs:MultiHoverAviary', ) diff --git a/gym_pybullet_drones/assets/beta.txt b/gym_pybullet_drones/assets/beta-presets-bak.txt similarity index 100% rename from gym_pybullet_drones/assets/beta.txt rename to gym_pybullet_drones/assets/beta-presets-bak.txt diff --git a/gym_pybullet_drones/assets/beta.csv b/gym_pybullet_drones/assets/beta-traj.csv similarity index 100% rename from gym_pybullet_drones/assets/beta.csv rename to gym_pybullet_drones/assets/beta-traj.csv diff --git a/gym_pybullet_drones/assets/clone_bfs.sh b/gym_pybullet_drones/assets/clone_bfs.sh index 454c80c5c..aeb2cafed 100755 --- a/gym_pybullet_drones/assets/clone_bfs.sh +++ b/gym_pybullet_drones/assets/clone_bfs.sh @@ -18,7 +18,7 @@ cd ../../ mkdir betaflight_sitl/ cd betaflight_sitl/ -# Step 1: Clone and open betaflight's source: +# Step 1: Clone and open betaflight's source (at the time of writing, branch `master`, future release 4.5)): git clone https://github.com/betaflight/betaflight temp/ @@ -27,8 +27,6 @@ git clone https://github.com/betaflight/betaflight temp/ # from Betaflight's `SIMULATOR_BUILD` cd temp/ sed -i "s/delayMicroseconds_real(50);/\/\/delayMicroseconds_real(50);/g" ./src/main/main.c -sed -i "s/ret = udpInit(\&stateLink, NULL, 9003, true);/\/\/ret = udpInit(\&stateLink, NULL, PORT_STATE, true);/g" ./src/main/target/SITL/sitl.c -sed -i "s/printf(\"start UDP server.../\/\/printf(\"start UDP server.../g" ./src/main/target/SITL/sitl.c # Prepare make arm_sdk_install @@ -47,7 +45,7 @@ for ((i = 0; i < desired_max_num_drones; i++)); do cp -r temp/ "bf${i}/" cd "bf${i}/" - # Step 3: Change the UDP ports used by each Betaflight SITL instancet + # Step 3: Change the UDP ports used by each Betaflight SITL instance replacement1="PORT_PWM_RAW 90${i}1" sed -i "s/$pattern1/$replacement1/g" ./src/main/target/SITL/sitl.c replacement2="PORT_PWM 90${i}2" diff --git a/files/videos/ffmpeg_png2mp4.sh b/gym_pybullet_drones/assets/ffmpeg_png2mp4.sh similarity index 100% rename from files/videos/ffmpeg_png2mp4.sh rename to gym_pybullet_drones/assets/ffmpeg_png2mp4.sh diff --git a/files/readme_images/helix.gif b/gym_pybullet_drones/assets/helix.gif similarity index 100% rename from files/readme_images/helix.gif rename to gym_pybullet_drones/assets/helix.gif diff --git a/files/readme_images/helix.png b/gym_pybullet_drones/assets/helix.png similarity index 100% rename from files/readme_images/helix.png rename to gym_pybullet_drones/assets/helix.png diff --git a/gym_pybullet_drones/envs/BaseAviary.py b/gym_pybullet_drones/envs/BaseAviary.py index 5d9a3a39a..8de25c03c 100755 --- a/gym_pybullet_drones/envs/BaseAviary.py +++ b/gym_pybullet_drones/envs/BaseAviary.py @@ -61,7 +61,7 @@ def __init__(self, gui : bool, optional Whether to use PyBullet's GUI. record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. + Whether to save a video of the simulation. obstacles : bool, optional Whether to add obstacles to the simulation. user_debug_gui : bool, optional @@ -338,7 +338,6 @@ def step(self, ) for i in range(self.NUM_DRONES)] #### Save, preprocess, and clip the action to the max. RPM # else: - self._saveLastAction(action) clipped_action = np.reshape(self._preprocessAction(action), (self.NUM_DRONES, 4)) #### Repeat for as many as the aggregate physics steps ##### for _ in range(self.PYB_STEPS_PER_CTRL): @@ -466,7 +465,6 @@ def _housekeeping(self): self.GUI_INPUT_TEXT = -1*np.ones(self.NUM_DRONES) self.USE_GUI_RPM=False self.last_input_switch = 0 - self.last_action = -1*np.ones((self.NUM_DRONES, 4)) self.last_clipped_action = np.zeros((self.NUM_DRONES, 4)) self.gui_input = np.zeros(4) #### Initialize the drones kinemaatic information ########## @@ -916,23 +914,6 @@ def _normalizedActionToRPM(self, ################################################################################ - def _saveLastAction(self, - action - ): - """Stores the most recent action into attribute `self.last_action`. - - The last action can be used to compute aerodynamic effects. - - Parameters - ---------- - action : ndarray - Ndarray containing the current RPMs input for each drone. - - """ - self.last_action = np.reshape(action, (self.NUM_DRONES, 4)) - - ################################################################################ - def _showDroneLocalAxes(self, nth_drone ): diff --git a/gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py b/gym_pybullet_drones/envs/BaseRLAviary.py similarity index 81% rename from gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py rename to gym_pybullet_drones/envs/BaseRLAviary.py index 40415449a..e6988e5df 100644 --- a/gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py +++ b/gym_pybullet_drones/envs/BaseRLAviary.py @@ -2,19 +2,20 @@ import numpy as np import pybullet as p from gymnasium import spaces +from collections import deque from gym_pybullet_drones.envs.BaseAviary import BaseAviary from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType, ImageType from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl -class BaseMultiagentAviary(BaseAviary): - """Base multi-agent environment class for reinforcement learning.""" +class BaseRLAviary(BaseAviary): + """Base single and multi-agent environment class for reinforcement learning.""" ################################################################################ def __init__(self, drone_model: DroneModel=DroneModel.CF2X, - num_drones: int=2, + num_drones: int=1, neighbourhood_radius: float=np.inf, initial_xyzs=None, initial_rpys=None, @@ -26,7 +27,7 @@ def __init__(self, obs: ObservationType=ObservationType.KIN, act: ActionType=ActionType.RPM ): - """Initialization of a generic multi-agent RL environment. + """Initialization of a generic single and multi-agent RL environment. Attributes `vision_attributes` and `dynamics_attributes` are selected based on the choice of `obs` and `act`; `obstacles` is set to True @@ -54,27 +55,27 @@ def __init__(self, gui : bool, optional Whether to use PyBullet's GUI. record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. + Whether to save a video of the simulation. obs : ObservationType, optional The type of observation space (kinematic information or vision) act : ActionType, optional The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.) """ - if num_drones < 2: - print("[ERROR] in BaseMultiagentAviary.__init__(), num_drones should be >= 2") - exit() + #### Create a buffer for the last 10 actions ############### + self.ACTION_BUFFER_SIZE = 10 + self.action_buffer = deque(maxlen=self.ACTION_BUFFER_SIZE) + #### vision_attributes = True if obs == ObservationType.RGB else False self.OBS_TYPE = obs self.ACT_TYPE = act - self.EPISODE_LEN_SEC = 5 #### Create integrated controllers ######################### if act in [ActionType.PID, ActionType.VEL, ActionType.ONE_D_PID]: os.environ['KMP_DUPLICATE_LIB_OK']='True' if drone_model in [DroneModel.CF2X, DroneModel.CF2P]: self.ctrl = [DSLPIDControl(drone_model=DroneModel.CF2X) for i in range(num_drones)] else: - print("[ERROR] in BaseMultiagentAviary.__init()__, no controller is available for the specified drone_model") + print("[ERROR] in BaseRLAviary.__init()__, no controller is available for the specified drone_model") super().__init__(drone_model=drone_model, num_drones=num_drones, neighbourhood_radius=neighbourhood_radius, @@ -144,10 +145,14 @@ def _actionSpace(self): elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]: size = 1 else: - print("[ERROR] in BaseMultiagentAviary._actionSpace()") + print("[ERROR] in BaseRLAviary._actionSpace()") exit() act_lower_bound = np.array([-1*np.ones(size) for i in range(self.NUM_DRONES)]) act_upper_bound = np.array([+1*np.ones(size) for i in range(self.NUM_DRONES)]) + # + for i in range(self.ACTION_BUFFER_SIZE): + self.action_buffer.append(np.zeros((self.NUM_DRONES,size))) + # return spaces.Box(low=act_lower_bound, high=act_upper_bound, dtype=np.float32) ################################################################################ @@ -179,6 +184,7 @@ def _preprocessAction(self, commanded to the 4 motors of each drone. """ + self.action_buffer.append(action) rpm = np.zeros((self.NUM_DRONES,4)) for k in range(action.shape[0]): target = action[k, :] @@ -219,16 +225,16 @@ def _preprocessAction(self, rpm[k,:] = np.repeat(self.HOVER_RPM * (1+0.05*target), 4) elif self.ACT_TYPE == ActionType.ONE_D_PID: state = self._getDroneStateVector(k) - rpm, _, _ = self.ctrl[k].computeControl(control_timestep=self.CTRL_TIMESTEP, + res, _, _ = self.ctrl[k].computeControl(control_timestep=self.CTRL_TIMESTEP, cur_pos=state[0:3], cur_quat=state[3:7], cur_vel=state[10:13], cur_ang_vel=state[13:16], target_pos=state[0:3]+0.1*np.array([0,0,target[0]]) ) - rpm[k,:] = rpm + rpm[k,:] = res else: - print("[ERROR] in BaseMultiagentAviary._preprocessAction()") + print("[ERROR] in BaseRLAviary._preprocessAction()") exit() return rpm @@ -244,28 +250,34 @@ def _observationSpace(self): """ if self.OBS_TYPE == ObservationType.RGB: - # return spaces.Dict({i: spaces.Box(low=0, - # high=255, - # shape=(self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8 - # ) for i in range(self.NUM_DRONES)}) return spaces.Box(low=0, high=255, shape=(self.NUM_DRONES, self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8) elif self.OBS_TYPE == ObservationType.KIN: - ############################################################ - #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS) - #### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ P0 P1 P2 P3 - # obs_lower_bound = np.array([-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]) - # obs_upper_bound = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) - # return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 ) ############################################################ #### OBS SPACE OF SIZE 12 - obs_lower_bound = np.array([[-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1] for i in range(self.NUM_DRONES)]) - obs_upper_bound = np.array([[1,1,1, 1,1,1, 1,1,1, 1,1,1] for i in range(self.NUM_DRONES)]) + #### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ + lo = -np.inf + hi = np.inf + obs_lower_bound = np.array([[lo,lo,0, lo,lo,lo,lo,lo,lo,lo,lo,lo] for i in range(self.NUM_DRONES)]) + obs_upper_bound = np.array([[hi,hi,hi,hi,hi,hi,hi,hi,hi,hi,hi,hi] for i in range(self.NUM_DRONES)]) + #### Add action buffer to observation space ################ + act_lo = -1 + act_hi = +1 + for i in range(self.ACTION_BUFFER_SIZE): + if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]: + obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo,act_lo] for i in range(self.NUM_DRONES)])]) + obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi,act_hi] for i in range(self.NUM_DRONES)])]) + elif self.ACT_TYPE==ActionType.PID: + obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo] for i in range(self.NUM_DRONES)])]) + obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi] for i in range(self.NUM_DRONES)])]) + elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]: + obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo] for i in range(self.NUM_DRONES)])]) + obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi] for i in range(self.NUM_DRONES)])]) return spaces.Box(low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32) ############################################################ else: - print("[ERROR] in BaseMultiagentAviary._observationSpace()") + print("[ERROR] in BaseRLAviary._observationSpace()") ################################################################################ @@ -291,35 +303,20 @@ def _computeObs(self): path=self.ONBOARD_IMG_PATH+"drone_"+str(i), frame_num=int(self.step_counter/self.IMG_CAPTURE_FREQ) ) - return np.array([self.rgb[i] for i in range(self.NUM_DRONES)]) + return np.array([self.rgb[i] for i in range(self.NUM_DRONES)]).astype('float32') elif self.OBS_TYPE == ObservationType.KIN: - ############################################################ - #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS) - # return { i : self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) } ############################################################ #### OBS SPACE OF SIZE 12 obs_12 = np.zeros((self.NUM_DRONES,12)) for i in range(self.NUM_DRONES): - obs = self._clipAndNormalizeState(self._getDroneStateVector(i)) + #obs = self._clipAndNormalizeState(self._getDroneStateVector(i)) + obs = self._getDroneStateVector(i) obs_12[i, :] = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,) - return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]) + ret = np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32') + #### Add action buffer to observation ####################### + for i in range(self.ACTION_BUFFER_SIZE): + ret = np.hstack([ret, np.array([self.action_buffer[i][j, :] for j in range(self.NUM_DRONES)])]) + return ret ############################################################ else: - print("[ERROR] in BaseMultiagentAviary._computeObs()") - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Must be implemented in a subclass. - - Parameters - ---------- - state : ndarray - Array containing the non-normalized state of a single drone. - - """ - raise NotImplementedError + print("[ERROR] in BaseRLAviary._computeObs()") diff --git a/gym_pybullet_drones/envs/BetaAviary.py b/gym_pybullet_drones/envs/BetaAviary.py index 6cf6caee6..e4d37a2e4 100644 --- a/gym_pybullet_drones/envs/BetaAviary.py +++ b/gym_pybullet_drones/envs/BetaAviary.py @@ -59,7 +59,7 @@ def __init__(self, gui : bool, optional Whether to use PyBullet's GUI. record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. + Whether to save a video of the simulation. obstacles : bool, optional Whether to add obstacles to the simulation. user_debug_gui : bool, optional diff --git a/gym_pybullet_drones/envs/CtrlAviary.py b/gym_pybullet_drones/envs/CtrlAviary.py index 3dc127217..b3ba8efee 100644 --- a/gym_pybullet_drones/envs/CtrlAviary.py +++ b/gym_pybullet_drones/envs/CtrlAviary.py @@ -47,7 +47,7 @@ def __init__(self, gui : bool, optional Whether to use PyBullet's GUI. record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. + Whether to save a video of the simulation. obstacles : bool, optional Whether to add obstacles to the simulation. user_debug_gui : bool, optional diff --git a/gym_pybullet_drones/envs/HoverAviary.py b/gym_pybullet_drones/envs/HoverAviary.py new file mode 100644 index 000000000..165b35c32 --- /dev/null +++ b/gym_pybullet_drones/envs/HoverAviary.py @@ -0,0 +1,127 @@ +import numpy as np + +from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary +from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType + +class HoverAviary(BaseRLAviary): + """Single agent RL problem: hover at position.""" + + ################################################################################ + + def __init__(self, + drone_model: DroneModel=DroneModel.CF2X, + initial_xyzs=None, + initial_rpys=None, + physics: Physics=Physics.PYB, + pyb_freq: int = 240, + ctrl_freq: int = 30, + gui=False, + record=False, + obs: ObservationType=ObservationType.KIN, + act: ActionType=ActionType.RPM + ): + """Initialization of a single agent RL environment. + + Using the generic single agent RL superclass. + + Parameters + ---------- + drone_model : DroneModel, optional + The desired drone type (detailed in an .urdf file in folder `assets`). + initial_xyzs: ndarray | None, optional + (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones. + initial_rpys: ndarray | None, optional + (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians). + physics : Physics, optional + The desired implementation of PyBullet physics/custom dynamics. + pyb_freq : int, optional + The frequency at which PyBullet steps (a multiple of ctrl_freq). + ctrl_freq : int, optional + The frequency at which the environment steps. + gui : bool, optional + Whether to use PyBullet's GUI. + record : bool, optional + Whether to save a video of the simulation. + obs : ObservationType, optional + The type of observation space (kinematic information or vision) + act : ActionType, optional + The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control) + + """ + self.TARGET_POS = np.array([0,0,1]) + self.EPISODE_LEN_SEC = 8 + super().__init__(drone_model=drone_model, + num_drones=1, + initial_xyzs=initial_xyzs, + initial_rpys=initial_rpys, + physics=physics, + pyb_freq=pyb_freq, + ctrl_freq=ctrl_freq, + gui=gui, + record=record, + obs=obs, + act=act + ) + + ################################################################################ + + def _computeReward(self): + """Computes the current reward value. + + Returns + ------- + float + The reward. + + """ + state = self._getDroneStateVector(0) + ret = max(0, 2 - np.linalg.norm(self.TARGET_POS-state[0:3])**4) + return ret + + ################################################################################ + + def _computeTerminated(self): + """Computes the current done value. + + Returns + ------- + bool + Whether the current episode is done. + + """ + state = self._getDroneStateVector(0) + if np.linalg.norm(self.TARGET_POS-state[0:3]) < .0001: + return True + else: + return False + + ################################################################################ + + def _computeTruncated(self): + """Computes the current truncated value. + + Returns + ------- + bool + Whether the current episode timed out. + + """ + if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC: + return True + else: + return False + + ################################################################################ + + def _computeInfo(self): + """Computes the current info dict(s). + + Unused. + + Returns + ------- + dict[str, int] + Dummy value. + + """ + return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years diff --git a/gym_pybullet_drones/envs/MultiHoverAviary.py b/gym_pybullet_drones/envs/MultiHoverAviary.py new file mode 100644 index 000000000..f1616cf5f --- /dev/null +++ b/gym_pybullet_drones/envs/MultiHoverAviary.py @@ -0,0 +1,140 @@ +import numpy as np + +from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary +from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType + +class MultiHoverAviary(BaseRLAviary): + """Multi-agent RL problem: leader-follower.""" + + ################################################################################ + + def __init__(self, + drone_model: DroneModel=DroneModel.CF2X, + num_drones: int=2, + neighbourhood_radius: float=np.inf, + initial_xyzs=None, + initial_rpys=None, + physics: Physics=Physics.PYB, + pyb_freq: int = 240, + ctrl_freq: int = 30, + gui=False, + record=False, + obs: ObservationType=ObservationType.KIN, + act: ActionType=ActionType.RPM + ): + """Initialization of a multi-agent RL environment. + + Using the generic multi-agent RL superclass. + + Parameters + ---------- + drone_model : DroneModel, optional + The desired drone type (detailed in an .urdf file in folder `assets`). + num_drones : int, optional + The desired number of drones in the aviary. + neighbourhood_radius : float, optional + Radius used to compute the drones' adjacency matrix, in meters. + initial_xyzs: ndarray | None, optional + (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones. + initial_rpys: ndarray | None, optional + (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians). + physics : Physics, optional + The desired implementation of PyBullet physics/custom dynamics. + pyb_freq : int, optional + The frequency at which PyBullet steps (a multiple of ctrl_freq). + ctrl_freq : int, optional + The frequency at which the environment steps. + gui : bool, optional + Whether to use PyBullet's GUI. + record : bool, optional + Whether to save a video of the simulation. + obs : ObservationType, optional + The type of observation space (kinematic information or vision) + act : ActionType, optional + The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control) + + """ + self.EPISODE_LEN_SEC = 8 + super().__init__(drone_model=drone_model, + num_drones=num_drones, + neighbourhood_radius=neighbourhood_radius, + initial_xyzs=initial_xyzs, + initial_rpys=initial_rpys, + physics=physics, + pyb_freq=pyb_freq, + ctrl_freq=ctrl_freq, + gui=gui, + record=record, + obs=obs, + act=act + ) + + self.TARGET_POS = self.INIT_XYZS + np.array([[0,0,1/(i+1)] for i in range(num_drones)]) + + ################################################################################ + + def _computeReward(self): + """Computes the current reward value. + + Returns + ------- + float + The reward. + + """ + states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)]) + ret = 0 + for i in range(self.NUM_DRONES): + ret += max(0, 2 - np.linalg.norm(self.TARGET_POS[i,:]-states[i][0:3])**4) + return ret + + ################################################################################ + + def _computeTerminated(self): + """Computes the current done value. + + Returns + ------- + bool + Whether the current episode is done. + + """ + states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)]) + dist = 0 + for i in range(self.NUM_DRONES): + dist += np.linalg.norm(self.TARGET_POS[i,:]-states[i][0:3]) + if dist < .0001: + return True + else: + return False + + ################################################################################ + + def _computeTruncated(self): + """Computes the current truncated value. + + Returns + ------- + bool + Whether the current episode timed out. + + """ + if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC: + return True + else: + return False + + ################################################################################ + + def _computeInfo(self): + """Computes the current info dict(s). + + Unused. + + Returns + ------- + dict[str, int] + Dummy value. + + """ + return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years diff --git a/gym_pybullet_drones/envs/RLAviary.py b/gym_pybullet_drones/envs/RLAviary.py deleted file mode 100644 index bbf5bdb4b..000000000 --- a/gym_pybullet_drones/envs/RLAviary.py +++ /dev/null @@ -1,2 +0,0 @@ -'''TBD -''' diff --git a/gym_pybullet_drones/envs/VelocityAviary.py b/gym_pybullet_drones/envs/VelocityAviary.py index c522c80df..56416ef8b 100644 --- a/gym_pybullet_drones/envs/VelocityAviary.py +++ b/gym_pybullet_drones/envs/VelocityAviary.py @@ -49,7 +49,7 @@ def __init__(self, gui : bool, optional Whether to use PyBullet's GUI. record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. + Whether to save a video of the simulation. obstacles : bool, optional Whether to add obstacles to the simulation. user_debug_gui : bool, optional diff --git a/gym_pybullet_drones/envs/__init__.py b/gym_pybullet_drones/envs/__init__.py index d630927c1..fc27b99d2 100644 --- a/gym_pybullet_drones/envs/__init__.py +++ b/gym_pybullet_drones/envs/__init__.py @@ -1,2 +1,5 @@ +from gym_pybullet_drones.envs.BetaAviary import BetaAviary from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary +from gym_pybullet_drones.envs.HoverAviary import HoverAviary +from gym_pybullet_drones.envs.MultiHoverAviary import MultiHoverAviary from gym_pybullet_drones.envs.VelocityAviary import VelocityAviary diff --git a/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py b/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py deleted file mode 100644 index b28f8accd..000000000 --- a/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py +++ /dev/null @@ -1,265 +0,0 @@ -import math -import numpy as np - -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType -from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary - -class FlockAviary(BaseMultiagentAviary): - """Multi-agent RL problem: flocking.""" - - ################################################################################ - - def __init__(self, - drone_model: DroneModel=DroneModel.CF2X, - num_drones: int=2, - neighbourhood_radius: float=np.inf, - initial_xyzs=None, - initial_rpys=None, - physics: Physics=Physics.PYB, - pyb_freq: int = 240, - ctrl_freq: int = 240, - gui=False, - record=False, - obs: ObservationType=ObservationType.KIN, - act: ActionType=ActionType.RPM): - """Initialization of a multi-agent RL environment. - - Using the generic multi-agent RL superclass. - - Parameters - ---------- - drone_model : DroneModel, optional - The desired drone type (detailed in an .urdf file in folder `assets`). - num_drones : int, optional - The desired number of drones in the aviary. - neighbourhood_radius : float, optional - Radius used to compute the drones' adjacency matrix, in meters. - initial_xyzs: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones. - initial_rpys: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians). - physics : Physics, optional - The desired implementation of PyBullet physics/custom dynamics. - pyb_freq : int, optional - The frequency at which PyBullet steps (a multiple of ctrl_freq). - ctrl_freq : int, optional - The frequency at which the environment steps. - gui : bool, optional - Whether to use PyBullet's GUI. - record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. - obs : ObservationType, optional - The type of observation space (kinematic information or vision) - act : ActionType, optional - The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control) - - """ - super().__init__(drone_model=drone_model, - num_drones=num_drones, - neighbourhood_radius=neighbourhood_radius, - initial_xyzs=initial_xyzs, - initial_rpys=initial_rpys, - physics=physics, - pyb_freq=pyb_freq, - ctrl_freq=ctrl_freq, - gui=gui, - record=record, - obs=obs, - act=act - ) - - ################################################################################ - - def _computeReward(self): - """Computes the current reward value(s). - - Returns - ------- - dict[int, float] - The reward value for each drone. - - """ - rewards = {} - states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)]) - rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 1]) - states[0, 0:3])**2 - for i in range(1, self.NUM_DRONES): - rewards[i] = -1 * np.linalg.norm(states[i-1, 2] - states[i, 2])**2 - return rewards - """ - # obs here is dictionary of the form {"i":{"state": Box(20,), "neighbors": MultiBinary(NUM_DRONES)}} - # parse velocity and position - vel = np.zeros((1, self.NUM_DRONES, 3)); pos = np.zeros((1, self.NUM_DRONES, 3)) - for i in range(self.NUM_DRONES): - pos[0,i,:] = obs[ i ]["state"][0:3] - vel[0,i,:] = obs[ i ]["state"][10:13] - # compute metrics - # velocity alignment - ali = 0 - EPSILON = 1e-3 # avoid divide by zero - linear_vel_norm = np.linalg.norm(vel, axis=2) - for i in range(self.NUM_DRONES): - for j in range(self.NUM_DRONES): - if j != i: - d = np.einsum('ij,ij->i', vel[:, i, :], vel[:, j, :]) - ali += (d / (linear_vel_norm[:, i] + EPSILON) / (linear_vel_norm[:, j] + EPSILON)) - ali /= (self.NUM_DRONES * (self.NUM_DRONES - 1)) - # flocking speed - cof_v = np.mean(vel, axis=1) # center of flock speed - avg_flock_linear_speed = np.linalg.norm(cof_v, axis=-1) - # spacing - whole_flock_spacing = [] - for i in range(self.NUM_DRONES): - flck_neighbor_pos = np.delete(pos, [i], 1) - drone_neighbor_pos_diff = flck_neighbor_pos - np.reshape(pos[:, i, :], (pos[:, i, :].shape[0], 1, -1)) - drone_neighbor_dis = np.linalg.norm(drone_neighbor_pos_diff, axis=-1) - drone_spacing = np.amin(drone_neighbor_dis, axis=-1) - whole_flock_spacing.append(drone_spacing) - whole_flock_spacing = np.stack(whole_flock_spacing, axis=-1) - avg_flock_spacing = np.mean(whole_flock_spacing, axis=-1) - var_flock_spacing = np.var(whole_flock_spacing, axis=-1) - # flocking metrics - FLOCK_SPACING_MIN = 1.0; FLOCK_SPACING_MAX = 3.0 - if FLOCK_SPACING_MIN < avg_flock_spacing[0] < FLOCK_SPACING_MAX: - avg_flock_spac_rew = 0.0 - else: - avg_flock_spac_rew = min(math.fabs(avg_flock_spacing[0] - FLOCK_SPACING_MIN), - math.fabs(avg_flock_spacing[0] - FLOCK_SPACING_MAX)) - reward = ali[0] + avg_flock_linear_speed[0] - avg_flock_spac_rew - var_flock_spacing[0] - return { i : reward for i in range(self.NUM_DRONES) } - """ - - ################################################################################ - - def _computeTerminated(self): - """Computes the current done value(s). - - Returns - ------- - dict[int | "__all__", bool] - Dictionary with the done value of each drone and - one additional boolean value for key "__all__". - - """ - bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False - done = {i: bool_val for i in range(self.NUM_DRONES)} - done["__all__"] = True if True in done.values() else False - return done - - ################################################################################ - - def _computeTruncated(self): - """Computes the current truncated value(s). - - Unused in this implementation. - - Returns - ------- - bool - Always false. - - """ - return False - - ################################################################################ - - def _computeInfo(self): - """Computes the current info dict(s). - - Unused. - - Returns - ------- - dict[int, dict[]] - Dictionary of empty dictionaries. - - """ - return {i: {} for i in range(self.NUM_DRONES)} - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Parameters - ---------- - state : ndarray - (20,)-shaped array of floats containing the non-normalized state of a single drone. - - Returns - ------- - ndarray - (20,)-shaped array of floats containing the normalized state of a single drone. - - """ - MAX_LIN_VEL_XY = 3 - MAX_LIN_VEL_Z = 1 - - MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC - MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC - - MAX_PITCH_ROLL = np.pi # Full range - - clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY) - clipped_pos_z = np.clip(state[2], 0, MAX_Z) - clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL) - clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY) - clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z) - - if self.GUI: - self._clipAndNormalizeStateWarning(state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z - ) - - normalized_pos_xy = clipped_pos_xy / MAX_XY - normalized_pos_z = clipped_pos_z / MAX_Z - normalized_rp = clipped_rp / MAX_PITCH_ROLL - normalized_y = state[9] / np.pi # No reason to clip - normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY - normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY - normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16] - - norm_and_clipped = np.hstack([normalized_pos_xy, - normalized_pos_z, - state[3:7], - normalized_rp, - normalized_y, - normalized_vel_xy, - normalized_vel_z, - normalized_ang_vel, - state[16:20] - ]).reshape(20,) - - return norm_and_clipped - - ################################################################################ - - def _clipAndNormalizeStateWarning(self, - state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z, - ): - """Debugging printouts associated to `_clipAndNormalizeState`. - - Print a warning if values in a state vector is out of the clipping range. - - """ - if not(clipped_pos_xy == np.array(state[0:2])).all(): - print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1])) - if not(clipped_pos_z == np.array(state[2])).all(): - print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2])) - if not(clipped_rp == np.array(state[7:9])).all(): - print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8])) - if not(clipped_vel_xy == np.array(state[10:12])).all(): - print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11])) - if not(clipped_vel_z == np.array(state[12])).all(): - print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12])) diff --git a/gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py b/gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py deleted file mode 100644 index 4722ac2aa..000000000 --- a/gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py +++ /dev/null @@ -1,223 +0,0 @@ -import numpy as np - -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType -from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary - -class LeaderFollowerAviary(BaseMultiagentAviary): - """Multi-agent RL problem: leader-follower.""" - - ################################################################################ - - def __init__(self, - drone_model: DroneModel=DroneModel.CF2X, - num_drones: int=2, - neighbourhood_radius: float=np.inf, - initial_xyzs=None, - initial_rpys=None, - physics: Physics=Physics.PYB, - pyb_freq: int = 240, - ctrl_freq: int = 240, - gui=False, - record=False, - obs: ObservationType=ObservationType.KIN, - act: ActionType=ActionType.RPM): - """Initialization of a multi-agent RL environment. - - Using the generic multi-agent RL superclass. - - Parameters - ---------- - drone_model : DroneModel, optional - The desired drone type (detailed in an .urdf file in folder `assets`). - num_drones : int, optional - The desired number of drones in the aviary. - neighbourhood_radius : float, optional - Radius used to compute the drones' adjacency matrix, in meters. - initial_xyzs: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones. - initial_rpys: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians). - physics : Physics, optional - The desired implementation of PyBullet physics/custom dynamics. - pyb_freq : int, optional - The frequency at which PyBullet steps (a multiple of ctrl_freq). - ctrl_freq : int, optional - The frequency at which the environment steps. - gui : bool, optional - Whether to use PyBullet's GUI. - record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. - obs : ObservationType, optional - The type of observation space (kinematic information or vision) - act : ActionType, optional - The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control) - - """ - super().__init__(drone_model=drone_model, - num_drones=num_drones, - neighbourhood_radius=neighbourhood_radius, - initial_xyzs=initial_xyzs, - initial_rpys=initial_rpys, - physics=physics, - pyb_freq=pyb_freq, - ctrl_freq=ctrl_freq, - gui=gui, - record=record, - obs=obs, - act=act - ) - - ################################################################################ - - def _computeReward(self): - """Computes the current reward value(s). - - Returns - ------- - dict[int, float] - The reward value for each drone. - - """ - rewards = {} - states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)]) - rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 0.5]) - states[0, 0:3])**2 - # rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD - for i in range(1, self.NUM_DRONES): - rewards[i] = -(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2 - return rewards - - ################################################################################ - - def _computeTerminated(self): - """Computes the current done value(s). - - Returns - ------- - dict[int | "__all__", bool] - Dictionary with the done value of each drone and - one additional boolean value for key "__all__". - - """ - bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False - done = {i: bool_val for i in range(self.NUM_DRONES)} - done["__all__"] = bool_val # True if True in done.values() else False - return done - - ################################################################################ - - def _computeTruncated(self): - """Computes the current truncated value(s). - - Unused in this implementation. - - Returns - ------- - bool - Always false. - - """ - return False - - ################################################################################ - - def _computeInfo(self): - """Computes the current info dict(s). - - Unused. - - Returns - ------- - dict[int, dict[]] - Dictionary of empty dictionaries. - - """ - return {i: {} for i in range(self.NUM_DRONES)} - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Parameters - ---------- - state : ndarray - (20,)-shaped array of floats containing the non-normalized state of a single drone. - - Returns - ------- - ndarray - (20,)-shaped array of floats containing the normalized state of a single drone. - - """ - MAX_LIN_VEL_XY = 3 - MAX_LIN_VEL_Z = 1 - - MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC - MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC - - MAX_PITCH_ROLL = np.pi # Full range - - clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY) - clipped_pos_z = np.clip(state[2], 0, MAX_Z) - clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL) - clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY) - clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z) - - if self.GUI: - self._clipAndNormalizeStateWarning(state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z - ) - - normalized_pos_xy = clipped_pos_xy / MAX_XY - normalized_pos_z = clipped_pos_z / MAX_Z - normalized_rp = clipped_rp / MAX_PITCH_ROLL - normalized_y = state[9] / np.pi # No reason to clip - normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY - normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY - normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16] - - norm_and_clipped = np.hstack([normalized_pos_xy, - normalized_pos_z, - state[3:7], - normalized_rp, - normalized_y, - normalized_vel_xy, - normalized_vel_z, - normalized_ang_vel, - state[16:20] - ]).reshape(20,) - - return norm_and_clipped - - ################################################################################ - - def _clipAndNormalizeStateWarning(self, - state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z, - ): - """Debugging printouts associated to `_clipAndNormalizeState`. - - Print a warning if values in a state vector is out of the clipping range. - - """ - if not(clipped_pos_xy == np.array(state[0:2])).all(): - print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1])) - if not(clipped_pos_z == np.array(state[2])).all(): - print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2])) - if not(clipped_rp == np.array(state[7:9])).all(): - print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8])) - if not(clipped_vel_xy == np.array(state[10:12])).all(): - print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11])) - if not(clipped_vel_z == np.array(state[12])).all(): - print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12])) diff --git a/gym_pybullet_drones/envs/multi_agent_rl/__init__.py b/gym_pybullet_drones/envs/multi_agent_rl/__init__.py deleted file mode 100644 index 09197c8d5..000000000 --- a/gym_pybullet_drones/envs/multi_agent_rl/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary -from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary -from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary diff --git a/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py b/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py deleted file mode 100644 index 5b4745cb3..000000000 --- a/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py +++ /dev/null @@ -1,305 +0,0 @@ -import os -from enum import Enum -import numpy as np -from gymnasium import spaces -import pybullet as p - -from gym_pybullet_drones.envs.BaseAviary import BaseAviary -from gym_pybullet_drones.utils.enums import DroneModel, Physics, ImageType, ActionType, ObservationType -from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl - - -class BaseSingleAgentAviary(BaseAviary): - """Base single drone environment class for reinforcement learning.""" - - ################################################################################ - - def __init__(self, - drone_model: DroneModel=DroneModel.CF2X, - initial_xyzs=None, - initial_rpys=None, - physics: Physics=Physics.PYB, - pyb_freq: int = 240, - ctrl_freq: int = 240, - gui=False, - record=False, - obs: ObservationType=ObservationType.KIN, - act: ActionType=ActionType.RPM - ): - """Initialization of a generic single agent RL environment. - - Attribute `num_drones` is automatically set to 1; `vision_attributes` - are selected based on the choice of `obs` and `act`; `obstacles` is - set to True and overridden with landmarks for vision applications; - `user_debug_gui` is set to False for performance. - - Parameters - ---------- - drone_model : DroneModel, optional - The desired drone type (detailed in an .urdf file in folder `assets`). - initial_xyzs: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones. - initial_rpys: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians). - physics : Physics, optional - The desired implementation of PyBullet physics/custom dynamics. - pyb_freq : int, optional - The frequency at which PyBullet steps (a multiple of ctrl_freq). - ctrl_freq : int, optional - The frequency at which the environment steps. - gui : bool, optional - Whether to use PyBullet's GUI. - record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. - obs : ObservationType, optional - The type of observation space (kinematic information or vision) - act : ActionType, optional - The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.) - - """ - vision_attributes = True if obs == ObservationType.RGB else False - self.OBS_TYPE = obs - self.ACT_TYPE = act - self.EPISODE_LEN_SEC = 5 - #### Create integrated controllers ######################### - if act in [ActionType.PID, ActionType.VEL, ActionType.ONE_D_PID]: - os.environ['KMP_DUPLICATE_LIB_OK']='True' - if drone_model in [DroneModel.CF2X, DroneModel.CF2P]: - self.ctrl = DSLPIDControl(drone_model=DroneModel.CF2X) - else: - print("[ERROR] in BaseSingleAgentAviary.__init()__, no controller is available for the specified drone_model") - super().__init__(drone_model=drone_model, - num_drones=1, - initial_xyzs=initial_xyzs, - initial_rpys=initial_rpys, - physics=physics, - pyb_freq=pyb_freq, - ctrl_freq=ctrl_freq, - gui=gui, - record=record, - obstacles=True, # Add obstacles for RGB observations and/or FlyThruGate - user_debug_gui=False, # Remove of RPM sliders from all single agent learning aviaries - vision_attributes=vision_attributes, - ) - #### Set a limit on the maximum target speed ############### - if act == ActionType.VEL: - self.SPEED_LIMIT = 0.03 * self.MAX_SPEED_KMH * (1000/3600) - - ################################################################################ - - def _addObstacles(self): - """Add obstacles to the environment. - - Only if the observation is of type RGB, 4 landmarks are added. - Overrides BaseAviary's method. - - """ - if self.OBS_TYPE == ObservationType.RGB: - p.loadURDF("block.urdf", - [1, 0, .1], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - p.loadURDF("cube_small.urdf", - [0, 1, .1], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - p.loadURDF("duck_vhacd.urdf", - [-1, 0, .1], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - p.loadURDF("teddy_vhacd.urdf", - [0, -1, .1], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - else: - pass - - ################################################################################ - - def _actionSpace(self): - """Returns the action space of the environment. - - Returns - ------- - ndarray - A Box() of size 1, 3, 4, or 6 depending on the action type. - - """ - if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]: - size = 4 - elif self.ACT_TYPE == ActionType.PID: - size = 3 - elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]: - size = 1 - else: - print("[ERROR] in BaseSingleAgentAviary._actionSpace()") - exit() - return spaces.Box(low=-1*np.ones(size), - # return spaces.Box(low=np.zeros(size), # Alternative action space, see PR #32 - high=np.ones(size), - dtype=np.float32 - ) - - ################################################################################ - - def _preprocessAction(self, - action - ): - """Pre-processes the action passed to `.step()` into motors' RPMs. - - Parameter `action` is processed differenly for each of the different - action types: `action` can be of length 1, 3, 4, or 6 and represent - RPMs, desired thrust and torques, the next target position to reach - using PID control, a desired velocity vector, new PID coefficients, etc. - - Parameters - ---------- - action : ndarray - The input action for each drone, to be translated into RPMs. - - Returns - ------- - ndarray - (4,)-shaped array of ints containing to clipped RPMs - commanded to the 4 motors of each drone. - - """ - if self.ACT_TYPE == ActionType.RPM: - return np.array(self.HOVER_RPM * (1+0.05*action)) - elif self.ACT_TYPE == ActionType.PID: - state = self._getDroneStateVector(0) - next_pos = self._calculateNextStep( - current_position=state[0:3], - destination=action, - step_size=1, - ) - rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP, - cur_pos=state[0:3], - cur_quat=state[3:7], - cur_vel=state[10:13], - cur_ang_vel=state[13:16], - target_pos=next_pos - ) - return rpm - elif self.ACT_TYPE == ActionType.VEL: - state = self._getDroneStateVector(0) - if np.linalg.norm(action[0:3]) != 0: - v_unit_vector = action[0:3] / np.linalg.norm(action[0:3]) - else: - v_unit_vector = np.zeros(3) - rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP, - cur_pos=state[0:3], - cur_quat=state[3:7], - cur_vel=state[10:13], - cur_ang_vel=state[13:16], - target_pos=state[0:3], # same as the current position - target_rpy=np.array([0,0,state[9]]), # keep current yaw - target_vel=self.SPEED_LIMIT * np.abs(action[3]) * v_unit_vector # target the desired velocity vector - ) - return rpm - elif self.ACT_TYPE == ActionType.ONE_D_RPM: - return np.repeat(self.HOVER_RPM * (1+0.05*action), 4) - elif self.ACT_TYPE == ActionType.ONE_D_PID: - state = self._getDroneStateVector(0) - rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP, - cur_pos=state[0:3], - cur_quat=state[3:7], - cur_vel=state[10:13], - cur_ang_vel=state[13:16], - target_pos=state[0:3]+0.1*np.array([0,0,action[0]]) - ) - return rpm - else: - print("[ERROR] in BaseSingleAgentAviary._preprocessAction()") - - ################################################################################ - - def _observationSpace(self): - """Returns the observation space of the environment. - - Returns - ------- - ndarray - A Box() of shape (H,W,4) or (12,) depending on the observation type. - - """ - if self.OBS_TYPE == ObservationType.RGB: - return spaces.Box(low=0, - high=255, - shape=(self.IMG_RES[1], self.IMG_RES[0], 4), - dtype=np.uint8 - ) - elif self.OBS_TYPE == ObservationType.KIN: - ############################################################ - #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS) - #### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ P0 P1 P2 P3 - # obs_lower_bound = np.array([-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]) - # obs_upper_bound = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) - # return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 ) - ############################################################ - #### OBS SPACE OF SIZE 12 - return spaces.Box(low=np.array([-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1]), - high=np.array([1,1,1, 1,1,1, 1,1,1, 1,1,1]), - dtype=np.float32 - ) - ############################################################ - else: - print("[ERROR] in BaseSingleAgentAviary._observationSpace()") - - ################################################################################ - - def _computeObs(self): - """Returns the current observation of the environment. - - Returns - ------- - ndarray - A Box() of shape (H,W,4) or (12,) depending on the observation type. - - """ - if self.OBS_TYPE == ObservationType.RGB: - if self.step_counter%self.IMG_CAPTURE_FREQ == 0: - self.rgb[0], self.dep[0], self.seg[0] = self._getDroneImages(0, - segmentation=False - ) - #### Printing observation to PNG frames example ############ - if self.RECORD: - self._exportImage(img_type=ImageType.RGB, - img_input=self.rgb[0], - path=self.ONBOARD_IMG_PATH, - frame_num=int(self.step_counter/self.IMG_CAPTURE_FREQ) - ) - return self.rgb[0] - elif self.OBS_TYPE == ObservationType.KIN: - obs = self._clipAndNormalizeState(self._getDroneStateVector(0)) - ############################################################ - #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS) - # return obs - ############################################################ - #### OBS SPACE OF SIZE 12 - ret = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,) - return ret.astype('float32') - ############################################################ - else: - print("[ERROR] in BaseSingleAgentAviary._computeObs()") - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Must be implemented in a subclass. - - Parameters - ---------- - state : ndarray - Array containing the non-normalized state of a single drone. - - """ - raise NotImplementedError diff --git a/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py b/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py deleted file mode 100644 index f1b072b20..000000000 --- a/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py +++ /dev/null @@ -1,240 +0,0 @@ -import os -import numpy as np -import pybullet as p -import pkg_resources - -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType, BaseSingleAgentAviary - - -class FlyThruGateAviary(BaseSingleAgentAviary): - """Single agent RL problem: fly through a gate.""" - - ################################################################################ - - def __init__(self, - drone_model: DroneModel=DroneModel.CF2X, - initial_xyzs=None, - initial_rpys=None, - physics: Physics=Physics.PYB, - pyb_freq: int = 240, - ctrl_freq: int = 240, - gui=False, - record=False, - obs: ObservationType=ObservationType.KIN, - act: ActionType=ActionType.RPM - ): - """Initialization of a single agent RL environment. - - Using the generic single agent RL superclass. - - Parameters - ---------- - drone_model : DroneModel, optional - The desired drone type (detailed in an .urdf file in folder `assets`). - initial_xyzs: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones. - initial_rpys: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians). - physics : Physics, optional - The desired implementation of PyBullet physics/custom dynamics. - pyb_freq : int, optional - The frequency at which PyBullet steps (a multiple of ctrl_freq). - ctrl_freq : int, optional - The frequency at which the environment steps. - gui : bool, optional - Whether to use PyBullet's GUI. - record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. - obs : ObservationType, optional - The type of observation space (kinematic information or vision) - act : ActionType, optional - The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control) - - """ - super().__init__(drone_model=drone_model, - initial_xyzs=initial_xyzs, - initial_rpys=initial_rpys, - physics=physics, - pyb_freq=pyb_freq, - ctrl_freq=ctrl_freq, - gui=gui, - record=record, - obs=obs, - act=act - ) - - ################################################################################ - - def _addObstacles(self): - """Add obstacles to the environment. - - Extends the superclass method and add the gate build of cubes and an architrave. - - """ - super()._addObstacles() - p.loadURDF(pkg_resources.resource_filename('gym_pybullet_drones', 'assets/architrave.urdf'), - [0, -1, .55], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - for i in range(10): - p.loadURDF("cube_small.urdf", - [-.3, -1, .02+i*0.05], - p.getQuaternionFromEuler([0, 0, 0]), - physicsClientId=self.CLIENT - ) - p.loadURDF("cube_small.urdf", - [.3, -1, .02+i*0.05], - p.getQuaternionFromEuler([0,0,0]), - physicsClientId=self.CLIENT - ) - - ################################################################################ - - def _computeReward(self): - """Computes the current reward value. - - Returns - ------- - float - The reward. - - """ - state = self._getDroneStateVector(0) - norm_ep_time = (self.step_counter/self.PYB_FREQ) / self.EPISODE_LEN_SEC - return -10 * np.linalg.norm(np.array([0, -2*norm_ep_time, 0.75])-state[0:3])**2 - - ################################################################################ - - def _computeTerminated(self): - """Computes the current done value. - - Returns - ------- - bool - Whether the current episode is done. - - """ - if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC: - return True - else: - return False - - ################################################################################ - - def _computeTruncated(self): - """Computes the current truncated value(s). - - Unused in this implementation. - - Returns - ------- - bool - Always false. - - """ - return False - - ################################################################################ - - def _computeInfo(self): - """Computes the current info dict(s). - - Unused. - - Returns - ------- - dict[str, int] - Dummy value. - - """ - return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Parameters - ---------- - state : ndarray - (20,)-shaped array of floats containing the non-normalized state of a single drone. - - Returns - ------- - ndarray - (20,)-shaped array of floats containing the normalized state of a single drone. - - """ - MAX_LIN_VEL_XY = 3 - MAX_LIN_VEL_Z = 1 - - MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC - MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC - - MAX_PITCH_ROLL = np.pi # Full range - - clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY) - clipped_pos_z = np.clip(state[2], 0, MAX_Z) - clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL) - clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY) - clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z) - - if self.GUI: - self._clipAndNormalizeStateWarning(state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z - ) - - normalized_pos_xy = clipped_pos_xy / MAX_XY - normalized_pos_z = clipped_pos_z / MAX_Z - normalized_rp = clipped_rp / MAX_PITCH_ROLL - normalized_y = state[9] / np.pi # No reason to clip - normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY - normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY - normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16] - - norm_and_clipped = np.hstack([normalized_pos_xy, - normalized_pos_z, - state[3:7], - normalized_rp, - normalized_y, - normalized_vel_xy, - normalized_vel_z, - normalized_ang_vel, - state[16:20] - ]).reshape(20,) - - return norm_and_clipped - - ################################################################################ - - def _clipAndNormalizeStateWarning(self, - state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z, - ): - """Debugging printouts associated to `_clipAndNormalizeState`. - - Print a warning if values in a state vector is out of the clipping range. - - """ - if not(clipped_pos_xy == np.array(state[0:2])).all(): - print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1])) - if not(clipped_pos_z == np.array(state[2])).all(): - print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2])) - if not(clipped_rp == np.array(state[7:9])).all(): - print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8])) - if not(clipped_vel_xy == np.array(state[10:12])).all(): - print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11])) - if not(clipped_vel_z == np.array(state[12])).all(): - print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12])) diff --git a/gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py b/gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py deleted file mode 100644 index 96fd10c2a..000000000 --- a/gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py +++ /dev/null @@ -1,209 +0,0 @@ -import numpy as np - -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType, BaseSingleAgentAviary - -class HoverAviary(BaseSingleAgentAviary): - """Single agent RL problem: hover at position.""" - - ################################################################################ - - def __init__(self, - drone_model: DroneModel=DroneModel.CF2X, - initial_xyzs=None, - initial_rpys=None, - physics: Physics=Physics.PYB, - pyb_freq: int = 240, - ctrl_freq: int = 240, - gui=False, - record=False, - obs: ObservationType=ObservationType.KIN, - act: ActionType=ActionType.RPM - ): - """Initialization of a single agent RL environment. - - Using the generic single agent RL superclass. - - Parameters - ---------- - drone_model : DroneModel, optional - The desired drone type (detailed in an .urdf file in folder `assets`). - initial_xyzs: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones. - initial_rpys: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians). - physics : Physics, optional - The desired implementation of PyBullet physics/custom dynamics. - pyb_freq : int, optional - The frequency at which PyBullet steps (a multiple of ctrl_freq). - ctrl_freq : int, optional - The frequency at which the environment steps. - gui : bool, optional - Whether to use PyBullet's GUI. - record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. - obs : ObservationType, optional - The type of observation space (kinematic information or vision) - act : ActionType, optional - The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control) - - """ - super().__init__(drone_model=drone_model, - initial_xyzs=initial_xyzs, - initial_rpys=initial_rpys, - physics=physics, - pyb_freq=pyb_freq, - ctrl_freq=ctrl_freq, - gui=gui, - record=record, - obs=obs, - act=act - ) - - ################################################################################ - - def _computeReward(self): - """Computes the current reward value. - - Returns - ------- - float - The reward. - - """ - state = self._getDroneStateVector(0) - return -1 * np.linalg.norm(np.array([0, 0, 1])-state[0:3])**2 - - ################################################################################ - - def _computeTerminated(self): - """Computes the current done value. - - Returns - ------- - bool - Whether the current episode is done. - - """ - if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC: - return True - else: - return False - - ################################################################################ - - def _computeTruncated(self): - """Computes the current truncated value(s). - - Unused in this implementation. - - Returns - ------- - bool - Always false. - - """ - return False - - ################################################################################ - - def _computeInfo(self): - """Computes the current info dict(s). - - Unused. - - Returns - ------- - dict[str, int] - Dummy value. - - """ - return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years - - ################################################################################ - - def _clipAndNormalizeState(self, - state - ): - """Normalizes a drone's state to the [-1,1] range. - - Parameters - ---------- - state : ndarray - (20,)-shaped array of floats containing the non-normalized state of a single drone. - - Returns - ------- - ndarray - (20,)-shaped array of floats containing the normalized state of a single drone. - - """ - MAX_LIN_VEL_XY = 3 - MAX_LIN_VEL_Z = 1 - - MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC - MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC - - MAX_PITCH_ROLL = np.pi # Full range - - clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY) - clipped_pos_z = np.clip(state[2], 0, MAX_Z) - clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL) - clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY) - clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z) - - if self.GUI: - self._clipAndNormalizeStateWarning(state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z - ) - - normalized_pos_xy = clipped_pos_xy / MAX_XY - normalized_pos_z = clipped_pos_z / MAX_Z - normalized_rp = clipped_rp / MAX_PITCH_ROLL - normalized_y = state[9] / np.pi # No reason to clip - normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY - normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY - normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16] - - norm_and_clipped = np.hstack([normalized_pos_xy, - normalized_pos_z, - state[3:7], - normalized_rp, - normalized_y, - normalized_vel_xy, - normalized_vel_z, - normalized_ang_vel, - state[16:20] - ]).reshape(20,) - - return norm_and_clipped - - ################################################################################ - - def _clipAndNormalizeStateWarning(self, - state, - clipped_pos_xy, - clipped_pos_z, - clipped_rp, - clipped_vel_xy, - clipped_vel_z, - ): - """Debugging printouts associated to `_clipAndNormalizeState`. - - Print a warning if values in a state vector is out of the clipping range. - - """ - if not(clipped_pos_xy == np.array(state[0:2])).all(): - print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1])) - if not(clipped_pos_z == np.array(state[2])).all(): - print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2])) - if not(clipped_rp == np.array(state[7:9])).all(): - print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8])) - if not(clipped_vel_xy == np.array(state[10:12])).all(): - print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11])) - if not(clipped_vel_z == np.array(state[12])).all(): - print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12])) diff --git a/gym_pybullet_drones/envs/single_agent_rl/__init__.py b/gym_pybullet_drones/envs/single_agent_rl/__init__.py deleted file mode 100644 index b50f296b4..000000000 --- a/gym_pybullet_drones/envs/single_agent_rl/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import BaseSingleAgentAviary -from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary -from gym_pybullet_drones.envs.single_agent_rl.FlyThruGateAviary import FlyThruGateAviary diff --git a/gym_pybullet_drones/examples/beta.py b/gym_pybullet_drones/examples/beta.py index f7da8dd5e..7f47a4841 100644 --- a/gym_pybullet_drones/examples/beta.py +++ b/gym_pybullet_drones/examples/beta.py @@ -88,7 +88,7 @@ def run( ) #### Run the simulation #################################### - with open("../assets/beta.csv", mode='r') as csv_file: + with open("../assets/beta-traj.csv", mode='r') as csv_file: csv_reader = csv.DictReader(csv_file) trajectory1 = iter([{ "pos": np.array([ diff --git a/gym_pybullet_drones/examples/learn.py b/gym_pybullet_drones/examples/learn.py index 81ebf9e90..63809ab73 100644 --- a/gym_pybullet_drones/examples/learn.py +++ b/gym_pybullet_drones/examples/learn.py @@ -1,370 +1,193 @@ -"""Script demonstrating the use of `gym_pybullet_drones`' Gymnasium interface. +"""Script demonstrating the use of `gym_pybullet_drones`'s Gymnasium interface. -Class HoverAviary is used as a learning env for the PPO algorithm. +Classes HoverAviary and LeaderFollowerAviary are used as learning envs for the PPO algorithm. Example ------- In a terminal, run as: - $ python learn.py + $ python learn.py --multiagent false + $ python learn.py --multiagent true Notes ----- This is a minimal working example integrating `gym-pybullet-drones` with -reinforcement learning libraries `stable-baselines3`. +reinforcement learning library `stable-baselines3`. """ +import os import time +from datetime import datetime import argparse import gymnasium as gym import numpy as np +import torch from stable_baselines3 import PPO +from stable_baselines3.common.env_util import make_vec_env +from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold +from stable_baselines3.common.evaluation import evaluate_policy from gym_pybullet_drones.utils.Logger import Logger -from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary +from gym_pybullet_drones.envs.HoverAviary import HoverAviary +from gym_pybullet_drones.envs.MultiHoverAviary import MultiHoverAviary from gym_pybullet_drones.utils.utils import sync, str2bool +from gym_pybullet_drones.utils.enums import ObservationType, ActionType DEFAULT_GUI = True DEFAULT_RECORD_VIDEO = False DEFAULT_OUTPUT_FOLDER = 'results' DEFAULT_COLAB = False -def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=DEFAULT_COLAB, record_video=DEFAULT_RECORD_VIDEO): +DEFAULT_OBS = ObservationType('kin') # 'kin' or 'rgb' +DEFAULT_ACT = ActionType('one_d_rpm') # 'rpm' or 'pid' or 'vel' or 'one_d_rpm' or 'one_d_pid' +DEFAULT_AGENTS = 2 +DEFAULT_MA = False + +def run(multiagent=DEFAULT_MA, output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=DEFAULT_COLAB, record_video=DEFAULT_RECORD_VIDEO, local=True): + + filename = os.path.join(output_folder, 'save-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S")) + if not os.path.exists(filename): + os.makedirs(filename+'/') + + if not multiagent: + train_env = make_vec_env(HoverAviary, + env_kwargs=dict(obs=DEFAULT_OBS, act=DEFAULT_ACT), + n_envs=1, + seed=0 + ) + eval_env = HoverAviary(obs=DEFAULT_OBS, act=DEFAULT_ACT) + else: + train_env = make_vec_env(MultiHoverAviary, + env_kwargs=dict(num_drones=DEFAULT_AGENTS, obs=DEFAULT_OBS, act=DEFAULT_ACT), + n_envs=1, + seed=0 + ) + eval_env = MultiHoverAviary(num_drones=DEFAULT_AGENTS, obs=DEFAULT_OBS, act=DEFAULT_ACT) #### Check the environment's spaces ######################## - env = gym.make("hover-aviary-v0") - print("[INFO] Action space:", env.action_space) - print("[INFO] Observation space:", env.observation_space) + print('[INFO] Action space:', train_env.action_space) + print('[INFO] Observation space:', train_env.observation_space) #### Train the model ####################################### - model = PPO("MlpPolicy", - env, - verbose=1 - ) - model.learn(total_timesteps=10000) # Typically not enough + model = PPO('MlpPolicy', + train_env, + # policy_kwargs=dict(activation_fn=torch.nn.ReLU, net_arch=[512, 512, dict(vf=[256, 128], pi=[256, 128])]), + # tensorboard_log=filename+'/tb/', + verbose=1) + + callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=np.inf, + verbose=1) + eval_callback = EvalCallback(eval_env, + callback_on_new_best=callback_on_best, + verbose=1, + best_model_save_path=filename+'/', + log_path=filename+'/', + eval_freq=int(2000), + deterministic=True, + render=False) + model.learn(total_timesteps=3*int(1e5) if local else int(1e2), # shorter training in GitHub Actions pytest + callback=eval_callback, + log_interval=100) + + #### Save the model ######################################## + model.save(filename+'/success_model.zip') + print(filename) + + #### Print training progression ############################ + with np.load(filename+'/evaluations.npz') as data: + for j in range(data['timesteps'].shape[0]): + print(str(data['timesteps'][j])+","+str(data['results'][j][0])) + + ############################################################ + ############################################################ + ############################################################ + ############################################################ + ############################################################ + + if os.path.isfile(filename+'/success_model.zip'): + path = filename+'/success_model.zip' + elif os.path.isfile(filename+'/best_model.zip'): + path = filename+'/best_model.zip' + else: + print("[ERROR]: no model under the specified path", filename) + model = PPO.load(path) #### Show (and record a video of) the model's performance ## - env = HoverAviary(gui=gui, - record=record_video - ) - logger = Logger(logging_freq_hz=int(env.CTRL_FREQ), - num_drones=1, - output_folder=output_folder, - colab=colab - ) - obs, info = env.reset(seed=42, options={}) + if not multiagent: + test_env = HoverAviary(gui=gui, + obs=DEFAULT_OBS, + act=DEFAULT_ACT, + record=record_video) + test_env_nogui = HoverAviary(obs=DEFAULT_OBS, act=DEFAULT_ACT) + else: + test_env = MultiHoverAviary(gui=gui, + num_drones=DEFAULT_AGENTS, + obs=DEFAULT_OBS, + act=DEFAULT_ACT, + record=record_video) + test_env_nogui = MultiHoverAviary(num_drones=DEFAULT_AGENTS, obs=DEFAULT_OBS, act=DEFAULT_ACT) + logger = Logger(logging_freq_hz=int(test_env.CTRL_FREQ), + num_drones=DEFAULT_AGENTS if multiagent else 1, + output_folder=output_folder, + colab=colab + ) + + mean_reward, std_reward = evaluate_policy(model, + test_env_nogui, + n_eval_episodes=10 + ) + print("\n\n\nMean reward ", mean_reward, " +- ", std_reward, "\n\n") + + obs, info = test_env.reset(seed=42, options={}) start = time.time() - for i in range(3*env.CTRL_FREQ): + for i in range((test_env.EPISODE_LEN_SEC+2)*test_env.CTRL_FREQ): action, _states = model.predict(obs, deterministic=True ) - obs, reward, terminated, truncated, info = env.step(action) - logger.log(drone=0, - timestamp=i/env.CTRL_FREQ, - state=np.hstack([obs[0:3], np.zeros(4), obs[3:15], np.resize(action, (4))]), - control=np.zeros(12) - ) - env.render() + obs, reward, terminated, truncated, info = test_env.step(action) + obs2 = obs.squeeze() + act2 = action.squeeze() + print("Obs:", obs, "\tAction", action, "\tReward:", reward, "\tTerminated:", terminated, "\tTruncated:", truncated) + if DEFAULT_OBS == ObservationType.KIN: + if not multiagent: + logger.log(drone=0, + timestamp=i/test_env.CTRL_FREQ, + state=np.hstack([obs2[0:3], + np.zeros(4), + obs2[3:15], + act2 + ]), + control=np.zeros(12) + ) + else: + for d in range(DEFAULT_AGENTS): + logger.log(drone=d, + timestamp=i/test_env.CTRL_FREQ, + state=np.hstack([obs2[d][0:3], + np.zeros(4), + obs2[d][3:15], + act2[d] + ]), + control=np.zeros(12) + ) + test_env.render() print(terminated) - sync(i, start, env.CTRL_TIMESTEP) + sync(i, start, test_env.CTRL_TIMESTEP) if terminated: - obs = env.reset(seed=42, options={}) - env.close() + obs = test_env.reset(seed=42, options={}) + test_env.close() - if plot: + if plot and DEFAULT_OBS == ObservationType.KIN: logger.plot() -if __name__ == "__main__": +if __name__ == '__main__': #### Define and parse (optional) arguments for the script ## - parser = argparse.ArgumentParser(description='Single agent reinforcement learning example script using HoverAviary') - parser.add_argument('--gui', default=DEFAULT_GUI, type=str2bool, help='Whether to use PyBullet GUI (default: True)', metavar='') - parser.add_argument('--record_video', default=DEFAULT_RECORD_VIDEO, type=str2bool, help='Whether to record a video (default: False)', metavar='') + parser = argparse.ArgumentParser(description='Single agent reinforcement learning example script') + parser.add_argument('--multiagent', default=DEFAULT_MA, type=str2bool, help='Whether to use example LeaderFollower instead of Hover (default: False)', metavar='') + parser.add_argument('--gui', default=DEFAULT_GUI, type=str2bool, help='Whether to use PyBullet GUI (default: True)', metavar='') + parser.add_argument('--record_video', default=DEFAULT_RECORD_VIDEO, type=str2bool, help='Whether to record a video (default: False)', metavar='') parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='') - parser.add_argument('--colab', default=DEFAULT_COLAB, type=bool, help='Whether example is being run by a notebook (default: "False")', metavar='') + parser.add_argument('--colab', default=DEFAULT_COLAB, type=bool, help='Whether example is being run by a notebook (default: "False")', metavar='') ARGS = parser.parse_args() run(**vars(ARGS)) - -# """Learning script for single agent problems. - -# Agents are based on `stable_baselines3`'s implementation of A2C, PPO SAC, TD3, DDPG. - -# Example -# ------- -# To run the script, type in a terminal: - -# $ python singleagent.py --env --algo --obs --act --cpu - -# Notes -# ----- -# Use: - -# $ tensorboard --logdir ./results/save-----/tb/ - -# To check the tensorboard results at: - -# http://localhost:6006/ - -# """ -# import os -# import time -# from datetime import datetime -# from sys import platform -# import argparse -# import subprocess -# import numpy as np -# import gymnasium as gym -# import torch -# from stable_baselines3.common.env_checker import check_env -# from stable_baselines3.common.cmd_util import make_vec_env # Module cmd_util will be renamed to env_util https://github.com/DLR-RM/stable-baselines3/pull/197 -# from stable_baselines3.common.vec_env import SubprocVecEnv, VecTransposeImage -# from stable_baselines3.common.utils import set_random_seed -# from stable_baselines3 import A2C -# from stable_baselines3 import PPO -# from stable_baselines3 import SAC -# from stable_baselines3 import TD3 -# from stable_baselines3 import DDPG -# from stable_baselines3.common.policies import ActorCriticPolicy as a2cppoMlpPolicy -# from stable_baselines3.common.policies import ActorCriticCnnPolicy as a2cppoCnnPolicy -# from stable_baselines3.sac.policies import SACPolicy as sacMlpPolicy -# from stable_baselines3.sac import CnnPolicy as sacCnnPolicy -# from stable_baselines3.td3 import MlpPolicy as td3ddpgMlpPolicy -# from stable_baselines3.td3 import CnnPolicy as td3ddpgCnnPolicy -# from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback, StopTrainingOnRewardThreshold - -# from gym_pybullet_drones.envs.single_agent_rl.TakeoffAviary import TakeoffAviary -# from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary -# from gym_pybullet_drones.envs.single_agent_rl.FlyThruGateAviary import FlyThruGateAviary -# from gym_pybullet_drones.envs.single_agent_rl.TuneAviary import TuneAviary -# from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType - -# import shared_constants - -# EPISODE_REWARD_THRESHOLD = -0 # Upperbound: rewards are always negative, but non-zero -# """float: Reward threshold to halt the script.""" - -# DEFAULT_ENV = 'hover' -# DEFAULT_ALGO = 'ppo' -# DEFAULT_OBS = ObservationType('kin') -# DEFAULT_ACT = ActionType('one_d_rpm') -# DEFAULT_CPU = 1 -# DEFAULT_STEPS = 35000 -# DEFAULT_OUTPUT_FOLDER = 'results' - -# def run( -# env=DEFAULT_ENV, -# algo=DEFAULT_ALGO, -# obs=DEFAULT_OBS, -# act=DEFAULT_ACT, -# cpu=DEFAULT_CPU, -# steps=DEFAULT_STEPS, -# output_folder=DEFAULT_OUTPUT_FOLDER -# ): - -# #### Save directory ######################################## -# filename = os.path.join(output_folder, 'save-'+env+'-'+algo+'-'+obs.value+'-'+act.value+'-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S")) -# if not os.path.exists(filename): -# os.makedirs(filename+'/') - -# #### Print out current git commit hash ##################### -# if (platform == "linux" or platform == "darwin") and ('GITHUB_ACTIONS' not in os.environ.keys()): -# git_commit = subprocess.check_output(["git", "describe", "--tags"]).strip() -# with open(filename+'/git_commit.txt', 'w+') as f: -# f.write(str(git_commit)) - -# #### Warning ############################################### -# if env == 'tune' and act != ActionType.TUN: -# print("\n\n\n[WARNING] TuneAviary is intended for use with ActionType.TUN\n\n\n") -# if act == ActionType.ONE_D_RPM or act == ActionType.ONE_D_DYN or act == ActionType.ONE_D_PID: -# print("\n\n\n[WARNING] Simplified 1D problem for debugging purposes\n\n\n") -# #### Errors ################################################ -# if not env in ['takeoff', 'hover']: -# print("[ERROR] 1D action space is only compatible with Takeoff and HoverAviary") -# exit() -# if act == ActionType.TUN and env != 'tune' : -# print("[ERROR] ActionType.TUN is only compatible with TuneAviary") -# exit() -# if algo in ['sac', 'td3', 'ddpg'] and cpu!=1: -# print("[ERROR] The selected algorithm does not support multiple environments") -# exit() - -# #### Uncomment to debug slurm scripts ###################### -# # exit() - -# env_name = env+"-aviary-v0" -# sa_env_kwargs = dict(aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act) -# # train_env = gym.make(env_name, aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act) # single environment instead of a vectorized one -# if env_name == "takeoff-aviary-v0": -# train_env = make_vec_env(TakeoffAviary, -# env_kwargs=sa_env_kwargs, -# n_envs=cpu, -# seed=0 -# ) -# if env_name == "hover-aviary-v0": -# train_env = make_vec_env(HoverAviary, -# env_kwargs=sa_env_kwargs, -# n_envs=cpu, -# seed=0 -# ) -# if env_name == "flythrugate-aviary-v0": -# train_env = make_vec_env(FlyThruGateAviary, -# env_kwargs=sa_env_kwargs, -# n_envs=cpu, -# seed=0 -# ) -# if env_name == "tune-aviary-v0": -# train_env = make_vec_env(TuneAviary, -# env_kwargs=sa_env_kwargs, -# n_envs=cpu, -# seed=0 -# ) -# print("[INFO] Action space:", train_env.action_space) -# print("[INFO] Observation space:", train_env.observation_space) -# # check_env(train_env, warn=True, skip_render_check=True) - -# #### On-policy algorithms ################################## -# onpolicy_kwargs = dict(activation_fn=torch.nn.ReLU, -# net_arch=[512, 512, dict(vf=[256, 128], pi=[256, 128])] -# ) # or None -# if algo == 'a2c': -# model = A2C(a2cppoMlpPolicy, -# train_env, -# policy_kwargs=onpolicy_kwargs, -# tensorboard_log=filename+'/tb/', -# verbose=1 -# ) if obs == ObservationType.KIN else A2C(a2cppoCnnPolicy, -# train_env, -# policy_kwargs=onpolicy_kwargs, -# tensorboard_log=filename+'/tb/', -# verbose=1 -# ) -# if algo == 'ppo': -# model = PPO(a2cppoMlpPolicy, -# train_env, -# policy_kwargs=onpolicy_kwargs, -# tensorboard_log=filename+'/tb/', -# verbose=1 -# ) if obs == ObservationType.KIN else PPO(a2cppoCnnPolicy, -# train_env, -# policy_kwargs=onpolicy_kwargs, -# tensorboard_log=filename+'/tb/', -# verbose=1 -# ) - -# #### Off-policy algorithms ################################# -# offpolicy_kwargs = dict(activation_fn=torch.nn.ReLU, -# net_arch=[512, 512, 256, 128] -# ) # or None # or dict(net_arch=dict(qf=[256, 128, 64, 32], pi=[256, 128, 64, 32])) -# if algo == 'sac': -# model = SAC(sacMlpPolicy, -# train_env, -# policy_kwargs=offpolicy_kwargs, -# tensorboard_log=filename+'/tb/', -# verbose=1 -# ) if obs==ObservationType.KIN else SAC(sacCnnPolicy, -# train_env, -# policy_kwargs=offpolicy_kwargs, -# tensorboard_log=filename+'/tb/', -# verbose=1 -# ) -# if algo == 'td3': -# model = TD3(td3ddpgMlpPolicy, -# train_env, -# policy_kwargs=offpolicy_kwargs, -# tensorboard_log=filename+'/tb/', -# verbose=1 -# ) if obs==ObservationType.KIN else TD3(td3ddpgCnnPolicy, -# train_env, -# policy_kwargs=offpolicy_kwargs, -# tensorboard_log=filename+'/tb/', -# verbose=1 -# ) -# if algo == 'ddpg': -# model = DDPG(td3ddpgMlpPolicy, -# train_env, -# policy_kwargs=offpolicy_kwargs, -# tensorboard_log=filename+'/tb/', -# verbose=1 -# ) if obs==ObservationType.KIN else DDPG(td3ddpgCnnPolicy, -# train_env, -# policy_kwargs=offpolicy_kwargs, -# tensorboard_log=filename+'/tb/', -# verbose=1 -# ) - -# #### Create eveluation environment ######################### -# if obs == ObservationType.KIN: -# eval_env = gym.make(env_name, -# aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, -# obs=obs, -# act=act -# ) -# elif obs == ObservationType.RGB: -# if env_name == "takeoff-aviary-v0": -# eval_env = make_vec_env(TakeoffAviary, -# env_kwargs=sa_env_kwargs, -# n_envs=1, -# seed=0 -# ) -# if env_name == "hover-aviary-v0": -# eval_env = make_vec_env(HoverAviary, -# env_kwargs=sa_env_kwargs, -# n_envs=1, -# seed=0 -# ) -# if env_name == "flythrugate-aviary-v0": -# eval_env = make_vec_env(FlyThruGateAviary, -# env_kwargs=sa_env_kwargs, -# n_envs=1, -# seed=0 -# ) -# if env_name == "tune-aviary-v0": -# eval_env = make_vec_env(TuneAviary, -# env_kwargs=sa_env_kwargs, -# n_envs=1, -# seed=0 -# ) -# eval_env = VecTransposeImage(eval_env) - -# #### Train the model ####################################### -# # checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=filename+'-logs/', name_prefix='rl_model') -# callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=EPISODE_REWARD_THRESHOLD, -# verbose=1 -# ) -# eval_callback = EvalCallback(eval_env, -# callback_on_new_best=callback_on_best, -# verbose=1, -# best_model_save_path=filename+'/', -# log_path=filename+'/', -# eval_freq=int(2000/cpu), -# deterministic=True, -# render=False -# ) -# model.learn(total_timesteps=steps, #int(1e12), -# callback=eval_callback, -# log_interval=100, -# ) - -# #### Save the model ######################################## -# model.save(filename+'/success_model.zip') -# print(filename) - -# #### Print training progression ############################ -# with np.load(filename+'/evaluations.npz') as data: -# for j in range(data['timesteps'].shape[0]): -# print(str(data['timesteps'][j])+","+str(data['results'][j][0])) - - -# if __name__ == "__main__": -# #### Define and parse (optional) arguments for the script ## -# parser = argparse.ArgumentParser(description='Single agent reinforcement learning experiments script') -# parser.add_argument('--env', default=DEFAULT_ENV, type=str, choices=['takeoff', 'hover', 'flythrugate', 'tune'], help='Task (default: hover)', metavar='') -# parser.add_argument('--algo', default=DEFAULT_ALGO, type=str, choices=['a2c', 'ppo', 'sac', 'td3', 'ddpg'], help='RL agent (default: ppo)', metavar='') -# parser.add_argument('--obs', default=DEFAULT_OBS, type=ObservationType, help='Observation space (default: kin)', metavar='') -# parser.add_argument('--act', default=DEFAULT_ACT, type=ActionType, help='Action space (default: one_d_rpm)', metavar='') -# parser.add_argument('--cpu', default=DEFAULT_CPU, type=int, help='Number of training environments (default: 1)', metavar='') -# parser.add_argument('--steps', default=DEFAULT_STEPS, type=int, help='Number of training time steps (default: 35000)', metavar='') -# parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='') -# ARGS = parser.parse_args() - -# run(**vars(ARGS)) \ No newline at end of file diff --git a/pypi_description.md b/pypi_description.md index 31acb7dcc..c60fe2e9f 100644 --- a/pypi_description.md +++ b/pypi_description.md @@ -1,6 +1,6 @@ # gym-pybullet-drones -[Simple](https://en.wikipedia.org/wiki/KISS_principle) OpenAI [Gym environment](https://gym.openai.com/envs/#classic_control) based on [PyBullet](https://github.com/bulletphysics/bullet3) for multi-agent reinforcement learning with quadrotors +[Simple](https://en.wikipedia.org/wiki/KISS_principle) OpenAI [Gym environment](https://gym.openai.com/envs/#classic_control) based on [PyBullet](https://github.com/bulletphysics/bullet3) for multi-agent reinforcement learning with quadrotors Find the project's published results at [Learning to Fly—a Gym Environment with PyBullet Physics for Reinforcement Learning of Multi-agent Quadcopter Control](https://ieeexplore.ieee.org/abstract/document/9635857). diff --git a/tests/test_examples.py b/tests/test_examples.py index 8df9b1ca0..3a04dd88e 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -12,4 +12,4 @@ def test_downwash(): def test_learn(): from gym_pybullet_drones.examples.learn import run - run(gui=False, plot=False, output_folder='tmp') + run(gui=False, plot=False, output_folder='tmp', local=False)