diff --git a/.gitignore b/.gitignore
index 98af37ec3..f24b20352 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,8 +4,6 @@ examples/debug.py
examples/test.py
# NumPy saves and videos
-files/logs/*.npy
-files/videos/*.mp4
results/
tmp/
diff --git a/README.md b/README.md
index c3829e2a9..8ae17360a 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ This is a minimalist refactoring of the original `gym-pybullet-drones` repositor
> **NOTE**: if you prefer to access the original codebase, presented at IROS in 2021, please `git checkout [paper|master]` after cloning the repo, and refer to the corresponding `README.md`'s.
-
+
## Installation
@@ -24,18 +24,27 @@ pip3 install -e . # if needed, `sudo apt install build-essential` to install `gc
## Use
-### PID position control example
+### PID control examples
```sh
cd gym_pybullet_drones/examples/
-python3 pid.py
+python3 pid.py # position and velocity reference
+python3 pid_velocity.py # desired velocity reference
```
-### Stable-baselines3 PPO RL example
+### Downwash effect example
```sh
cd gym_pybullet_drones/examples/
-python3 learn.py
+python3 downwash.py
+```
+
+### Stable-baselines3 PPO RL 3'-training examples
+
+```sh
+cd gym_pybullet_drones/examples/
+python learn.py # task: single drone hover at z == 1
+python learn.py --multiagent true # task: 2-drone hover at z == 1.2 and 0.7
```
### Betaflight SITL example (Ubuntu only)
@@ -96,13 +105,14 @@ If you wish, please cite our [IROS 2021 paper](https://arxiv.org/abs/2103.02142)
- C. Karen Liu and Dan Negrut (2020) [*The Role of Physics-Based Simulators in Robotics*](https://www.annualreviews.org/doi/pdf/10.1146/annurev-control-072220-093055)
- Yunlong Song, Selim Naji, Elia Kaufmann, Antonio Loquercio, and Davide Scaramuzza (2020) [*Flightmare: A Flexible Quadrotor Simulator*](https://arxiv.org/pdf/2009.00563.pdf)
-## TODO
+## Core Team WIP
+
+- [ ] Multi-drone `crazyflie-firmware` SITL support (@spencerteetaert, @JacopoPan)
+
+## Desired Contributions/PRs
-- [ ] Add `crazyflie-firmware` SITL support @spencerteetaert
-- [ ] Add motor delay @JacopoPan / @spencerteetaert
-- [ ] Replace `rpy` with quaternions (and `ang_vel` with body rates) in `obs` @JacopoPan
-- [ ] Replace `BaseSingleAgentAviary` and `BaseMultiAgentAviary` with a single `RLAviary`, incl. PR #161 @JacopoPan
-- [ ] Add a multi-agent MDP with 2-drone chase through a gate @JacopoPan
+- [ ] Add motor delay by implementing a buffer in `BaseAviary._dynamics()`
+- [ ] Replace `rpy` with quaternions (and `ang_vel` with body rates) by editing `BaseAviary._updateAndStoreKinematicInformation()`, `BaseAviary._getDroneStateVector()`, and the `.computeObs()` methods of relevant subclasses
-----
> University of Toronto's [Dynamic Systems Lab](https://github.com/utiasDSL) / [Vector Institute](https://github.com/VectorInstitute) / University of Cambridge's [Prorok Lab](https://github.com/proroklab)
diff --git a/files/logs/foo.txt b/files/logs/foo.txt
deleted file mode 100644
index e69de29bb..000000000
diff --git a/files/readme_images/2020.gif b/files/readme_images/2020.gif
deleted file mode 100644
index 92bc0fe11..000000000
Binary files a/files/readme_images/2020.gif and /dev/null differ
diff --git a/files/readme_images/dep.gif b/files/readme_images/dep.gif
deleted file mode 100644
index ff67941dd..000000000
Binary files a/files/readme_images/dep.gif and /dev/null differ
diff --git a/files/readme_images/downwash.gif b/files/readme_images/downwash.gif
deleted file mode 100644
index 3ed4372f4..000000000
Binary files a/files/readme_images/downwash.gif and /dev/null differ
diff --git a/files/readme_images/downwash.png b/files/readme_images/downwash.png
deleted file mode 100644
index 34e19a8d2..000000000
Binary files a/files/readme_images/downwash.png and /dev/null differ
diff --git a/files/readme_images/rgb.gif b/files/readme_images/rgb.gif
deleted file mode 100644
index 26c2017cd..000000000
Binary files a/files/readme_images/rgb.gif and /dev/null differ
diff --git a/files/readme_images/seg.gif b/files/readme_images/seg.gif
deleted file mode 100644
index 9b4e0ccae..000000000
Binary files a/files/readme_images/seg.gif and /dev/null differ
diff --git a/gym_pybullet_drones/__init__.py b/gym_pybullet_drones/__init__.py
index 365cbfaab..db2933aa1 100644
--- a/gym_pybullet_drones/__init__.py
+++ b/gym_pybullet_drones/__init__.py
@@ -12,20 +12,10 @@
register(
id='hover-aviary-v0',
- entry_point='gym_pybullet_drones.envs.single_agent_rl:HoverAviary',
+ entry_point='gym_pybullet_drones.envs:HoverAviary',
)
register(
- id='flythrugate-aviary-v0',
- entry_point='gym_pybullet_drones.envs.single_agent_rl:FlyThruGateAviary',
-)
-
-register(
- id='flock-aviary-v0',
- entry_point='gym_pybullet_drones.envs.multi_agent_rl:FlockAviary',
-)
-
-register(
- id='leaderfollower-aviary-v0',
- entry_point='gym_pybullet_drones.envs.multi_agent_rl:LeaderFollowerAviary',
+ id='multihover-aviary-v0',
+ entry_point='gym_pybullet_drones.envs:MultiHoverAviary',
)
diff --git a/gym_pybullet_drones/assets/beta.txt b/gym_pybullet_drones/assets/beta-presets-bak.txt
similarity index 100%
rename from gym_pybullet_drones/assets/beta.txt
rename to gym_pybullet_drones/assets/beta-presets-bak.txt
diff --git a/gym_pybullet_drones/assets/beta.csv b/gym_pybullet_drones/assets/beta-traj.csv
similarity index 100%
rename from gym_pybullet_drones/assets/beta.csv
rename to gym_pybullet_drones/assets/beta-traj.csv
diff --git a/gym_pybullet_drones/assets/clone_bfs.sh b/gym_pybullet_drones/assets/clone_bfs.sh
index 454c80c5c..aeb2cafed 100755
--- a/gym_pybullet_drones/assets/clone_bfs.sh
+++ b/gym_pybullet_drones/assets/clone_bfs.sh
@@ -18,7 +18,7 @@ cd ../../
mkdir betaflight_sitl/
cd betaflight_sitl/
-# Step 1: Clone and open betaflight's source:
+# Step 1: Clone and open betaflight's source (at the time of writing, branch `master`, future release 4.5)):
git clone https://github.com/betaflight/betaflight temp/
@@ -27,8 +27,6 @@ git clone https://github.com/betaflight/betaflight temp/
# from Betaflight's `SIMULATOR_BUILD`
cd temp/
sed -i "s/delayMicroseconds_real(50);/\/\/delayMicroseconds_real(50);/g" ./src/main/main.c
-sed -i "s/ret = udpInit(\&stateLink, NULL, 9003, true);/\/\/ret = udpInit(\&stateLink, NULL, PORT_STATE, true);/g" ./src/main/target/SITL/sitl.c
-sed -i "s/printf(\"start UDP server.../\/\/printf(\"start UDP server.../g" ./src/main/target/SITL/sitl.c
# Prepare
make arm_sdk_install
@@ -47,7 +45,7 @@ for ((i = 0; i < desired_max_num_drones; i++)); do
cp -r temp/ "bf${i}/"
cd "bf${i}/"
- # Step 3: Change the UDP ports used by each Betaflight SITL instancet
+ # Step 3: Change the UDP ports used by each Betaflight SITL instance
replacement1="PORT_PWM_RAW 90${i}1"
sed -i "s/$pattern1/$replacement1/g" ./src/main/target/SITL/sitl.c
replacement2="PORT_PWM 90${i}2"
diff --git a/files/videos/ffmpeg_png2mp4.sh b/gym_pybullet_drones/assets/ffmpeg_png2mp4.sh
similarity index 100%
rename from files/videos/ffmpeg_png2mp4.sh
rename to gym_pybullet_drones/assets/ffmpeg_png2mp4.sh
diff --git a/files/readme_images/helix.gif b/gym_pybullet_drones/assets/helix.gif
similarity index 100%
rename from files/readme_images/helix.gif
rename to gym_pybullet_drones/assets/helix.gif
diff --git a/files/readme_images/helix.png b/gym_pybullet_drones/assets/helix.png
similarity index 100%
rename from files/readme_images/helix.png
rename to gym_pybullet_drones/assets/helix.png
diff --git a/gym_pybullet_drones/envs/BaseAviary.py b/gym_pybullet_drones/envs/BaseAviary.py
index 5d9a3a39a..8de25c03c 100755
--- a/gym_pybullet_drones/envs/BaseAviary.py
+++ b/gym_pybullet_drones/envs/BaseAviary.py
@@ -61,7 +61,7 @@ def __init__(self,
gui : bool, optional
Whether to use PyBullet's GUI.
record : bool, optional
- Whether to save a video of the simulation in folder `files/videos/`.
+ Whether to save a video of the simulation.
obstacles : bool, optional
Whether to add obstacles to the simulation.
user_debug_gui : bool, optional
@@ -338,7 +338,6 @@ def step(self,
) for i in range(self.NUM_DRONES)]
#### Save, preprocess, and clip the action to the max. RPM #
else:
- self._saveLastAction(action)
clipped_action = np.reshape(self._preprocessAction(action), (self.NUM_DRONES, 4))
#### Repeat for as many as the aggregate physics steps #####
for _ in range(self.PYB_STEPS_PER_CTRL):
@@ -466,7 +465,6 @@ def _housekeeping(self):
self.GUI_INPUT_TEXT = -1*np.ones(self.NUM_DRONES)
self.USE_GUI_RPM=False
self.last_input_switch = 0
- self.last_action = -1*np.ones((self.NUM_DRONES, 4))
self.last_clipped_action = np.zeros((self.NUM_DRONES, 4))
self.gui_input = np.zeros(4)
#### Initialize the drones kinemaatic information ##########
@@ -916,23 +914,6 @@ def _normalizedActionToRPM(self,
################################################################################
- def _saveLastAction(self,
- action
- ):
- """Stores the most recent action into attribute `self.last_action`.
-
- The last action can be used to compute aerodynamic effects.
-
- Parameters
- ----------
- action : ndarray
- Ndarray containing the current RPMs input for each drone.
-
- """
- self.last_action = np.reshape(action, (self.NUM_DRONES, 4))
-
- ################################################################################
-
def _showDroneLocalAxes(self,
nth_drone
):
diff --git a/gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py b/gym_pybullet_drones/envs/BaseRLAviary.py
similarity index 81%
rename from gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py
rename to gym_pybullet_drones/envs/BaseRLAviary.py
index 40415449a..e6988e5df 100644
--- a/gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py
+++ b/gym_pybullet_drones/envs/BaseRLAviary.py
@@ -2,19 +2,20 @@
import numpy as np
import pybullet as p
from gymnasium import spaces
+from collections import deque
from gym_pybullet_drones.envs.BaseAviary import BaseAviary
from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType, ImageType
from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl
-class BaseMultiagentAviary(BaseAviary):
- """Base multi-agent environment class for reinforcement learning."""
+class BaseRLAviary(BaseAviary):
+ """Base single and multi-agent environment class for reinforcement learning."""
################################################################################
def __init__(self,
drone_model: DroneModel=DroneModel.CF2X,
- num_drones: int=2,
+ num_drones: int=1,
neighbourhood_radius: float=np.inf,
initial_xyzs=None,
initial_rpys=None,
@@ -26,7 +27,7 @@ def __init__(self,
obs: ObservationType=ObservationType.KIN,
act: ActionType=ActionType.RPM
):
- """Initialization of a generic multi-agent RL environment.
+ """Initialization of a generic single and multi-agent RL environment.
Attributes `vision_attributes` and `dynamics_attributes` are selected
based on the choice of `obs` and `act`; `obstacles` is set to True
@@ -54,27 +55,27 @@ def __init__(self,
gui : bool, optional
Whether to use PyBullet's GUI.
record : bool, optional
- Whether to save a video of the simulation in folder `files/videos/`.
+ Whether to save a video of the simulation.
obs : ObservationType, optional
The type of observation space (kinematic information or vision)
act : ActionType, optional
The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.)
"""
- if num_drones < 2:
- print("[ERROR] in BaseMultiagentAviary.__init__(), num_drones should be >= 2")
- exit()
+ #### Create a buffer for the last 10 actions ###############
+ self.ACTION_BUFFER_SIZE = 10
+ self.action_buffer = deque(maxlen=self.ACTION_BUFFER_SIZE)
+ ####
vision_attributes = True if obs == ObservationType.RGB else False
self.OBS_TYPE = obs
self.ACT_TYPE = act
- self.EPISODE_LEN_SEC = 5
#### Create integrated controllers #########################
if act in [ActionType.PID, ActionType.VEL, ActionType.ONE_D_PID]:
os.environ['KMP_DUPLICATE_LIB_OK']='True'
if drone_model in [DroneModel.CF2X, DroneModel.CF2P]:
self.ctrl = [DSLPIDControl(drone_model=DroneModel.CF2X) for i in range(num_drones)]
else:
- print("[ERROR] in BaseMultiagentAviary.__init()__, no controller is available for the specified drone_model")
+ print("[ERROR] in BaseRLAviary.__init()__, no controller is available for the specified drone_model")
super().__init__(drone_model=drone_model,
num_drones=num_drones,
neighbourhood_radius=neighbourhood_radius,
@@ -144,10 +145,14 @@ def _actionSpace(self):
elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]:
size = 1
else:
- print("[ERROR] in BaseMultiagentAviary._actionSpace()")
+ print("[ERROR] in BaseRLAviary._actionSpace()")
exit()
act_lower_bound = np.array([-1*np.ones(size) for i in range(self.NUM_DRONES)])
act_upper_bound = np.array([+1*np.ones(size) for i in range(self.NUM_DRONES)])
+ #
+ for i in range(self.ACTION_BUFFER_SIZE):
+ self.action_buffer.append(np.zeros((self.NUM_DRONES,size)))
+ #
return spaces.Box(low=act_lower_bound, high=act_upper_bound, dtype=np.float32)
################################################################################
@@ -179,6 +184,7 @@ def _preprocessAction(self,
commanded to the 4 motors of each drone.
"""
+ self.action_buffer.append(action)
rpm = np.zeros((self.NUM_DRONES,4))
for k in range(action.shape[0]):
target = action[k, :]
@@ -219,16 +225,16 @@ def _preprocessAction(self,
rpm[k,:] = np.repeat(self.HOVER_RPM * (1+0.05*target), 4)
elif self.ACT_TYPE == ActionType.ONE_D_PID:
state = self._getDroneStateVector(k)
- rpm, _, _ = self.ctrl[k].computeControl(control_timestep=self.CTRL_TIMESTEP,
+ res, _, _ = self.ctrl[k].computeControl(control_timestep=self.CTRL_TIMESTEP,
cur_pos=state[0:3],
cur_quat=state[3:7],
cur_vel=state[10:13],
cur_ang_vel=state[13:16],
target_pos=state[0:3]+0.1*np.array([0,0,target[0]])
)
- rpm[k,:] = rpm
+ rpm[k,:] = res
else:
- print("[ERROR] in BaseMultiagentAviary._preprocessAction()")
+ print("[ERROR] in BaseRLAviary._preprocessAction()")
exit()
return rpm
@@ -244,28 +250,34 @@ def _observationSpace(self):
"""
if self.OBS_TYPE == ObservationType.RGB:
- # return spaces.Dict({i: spaces.Box(low=0,
- # high=255,
- # shape=(self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8
- # ) for i in range(self.NUM_DRONES)})
return spaces.Box(low=0,
high=255,
shape=(self.NUM_DRONES, self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8)
elif self.OBS_TYPE == ObservationType.KIN:
- ############################################################
- #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS)
- #### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ P0 P1 P2 P3
- # obs_lower_bound = np.array([-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])
- # obs_upper_bound = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
- # return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 )
############################################################
#### OBS SPACE OF SIZE 12
- obs_lower_bound = np.array([[-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1] for i in range(self.NUM_DRONES)])
- obs_upper_bound = np.array([[1,1,1, 1,1,1, 1,1,1, 1,1,1] for i in range(self.NUM_DRONES)])
+ #### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ
+ lo = -np.inf
+ hi = np.inf
+ obs_lower_bound = np.array([[lo,lo,0, lo,lo,lo,lo,lo,lo,lo,lo,lo] for i in range(self.NUM_DRONES)])
+ obs_upper_bound = np.array([[hi,hi,hi,hi,hi,hi,hi,hi,hi,hi,hi,hi] for i in range(self.NUM_DRONES)])
+ #### Add action buffer to observation space ################
+ act_lo = -1
+ act_hi = +1
+ for i in range(self.ACTION_BUFFER_SIZE):
+ if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]:
+ obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo,act_lo] for i in range(self.NUM_DRONES)])])
+ obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi,act_hi] for i in range(self.NUM_DRONES)])])
+ elif self.ACT_TYPE==ActionType.PID:
+ obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo] for i in range(self.NUM_DRONES)])])
+ obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi] for i in range(self.NUM_DRONES)])])
+ elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]:
+ obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo] for i in range(self.NUM_DRONES)])])
+ obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi] for i in range(self.NUM_DRONES)])])
return spaces.Box(low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32)
############################################################
else:
- print("[ERROR] in BaseMultiagentAviary._observationSpace()")
+ print("[ERROR] in BaseRLAviary._observationSpace()")
################################################################################
@@ -291,35 +303,20 @@ def _computeObs(self):
path=self.ONBOARD_IMG_PATH+"drone_"+str(i),
frame_num=int(self.step_counter/self.IMG_CAPTURE_FREQ)
)
- return np.array([self.rgb[i] for i in range(self.NUM_DRONES)])
+ return np.array([self.rgb[i] for i in range(self.NUM_DRONES)]).astype('float32')
elif self.OBS_TYPE == ObservationType.KIN:
- ############################################################
- #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS)
- # return { i : self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) }
############################################################
#### OBS SPACE OF SIZE 12
obs_12 = np.zeros((self.NUM_DRONES,12))
for i in range(self.NUM_DRONES):
- obs = self._clipAndNormalizeState(self._getDroneStateVector(i))
+ #obs = self._clipAndNormalizeState(self._getDroneStateVector(i))
+ obs = self._getDroneStateVector(i)
obs_12[i, :] = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,)
- return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)])
+ ret = np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32')
+ #### Add action buffer to observation #######################
+ for i in range(self.ACTION_BUFFER_SIZE):
+ ret = np.hstack([ret, np.array([self.action_buffer[i][j, :] for j in range(self.NUM_DRONES)])])
+ return ret
############################################################
else:
- print("[ERROR] in BaseMultiagentAviary._computeObs()")
-
- ################################################################################
-
- def _clipAndNormalizeState(self,
- state
- ):
- """Normalizes a drone's state to the [-1,1] range.
-
- Must be implemented in a subclass.
-
- Parameters
- ----------
- state : ndarray
- Array containing the non-normalized state of a single drone.
-
- """
- raise NotImplementedError
+ print("[ERROR] in BaseRLAviary._computeObs()")
diff --git a/gym_pybullet_drones/envs/BetaAviary.py b/gym_pybullet_drones/envs/BetaAviary.py
index 6cf6caee6..e4d37a2e4 100644
--- a/gym_pybullet_drones/envs/BetaAviary.py
+++ b/gym_pybullet_drones/envs/BetaAviary.py
@@ -59,7 +59,7 @@ def __init__(self,
gui : bool, optional
Whether to use PyBullet's GUI.
record : bool, optional
- Whether to save a video of the simulation in folder `files/videos/`.
+ Whether to save a video of the simulation.
obstacles : bool, optional
Whether to add obstacles to the simulation.
user_debug_gui : bool, optional
diff --git a/gym_pybullet_drones/envs/CtrlAviary.py b/gym_pybullet_drones/envs/CtrlAviary.py
index 3dc127217..b3ba8efee 100644
--- a/gym_pybullet_drones/envs/CtrlAviary.py
+++ b/gym_pybullet_drones/envs/CtrlAviary.py
@@ -47,7 +47,7 @@ def __init__(self,
gui : bool, optional
Whether to use PyBullet's GUI.
record : bool, optional
- Whether to save a video of the simulation in folder `files/videos/`.
+ Whether to save a video of the simulation.
obstacles : bool, optional
Whether to add obstacles to the simulation.
user_debug_gui : bool, optional
diff --git a/gym_pybullet_drones/envs/HoverAviary.py b/gym_pybullet_drones/envs/HoverAviary.py
new file mode 100644
index 000000000..165b35c32
--- /dev/null
+++ b/gym_pybullet_drones/envs/HoverAviary.py
@@ -0,0 +1,127 @@
+import numpy as np
+
+from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary
+from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType
+
+class HoverAviary(BaseRLAviary):
+ """Single agent RL problem: hover at position."""
+
+ ################################################################################
+
+ def __init__(self,
+ drone_model: DroneModel=DroneModel.CF2X,
+ initial_xyzs=None,
+ initial_rpys=None,
+ physics: Physics=Physics.PYB,
+ pyb_freq: int = 240,
+ ctrl_freq: int = 30,
+ gui=False,
+ record=False,
+ obs: ObservationType=ObservationType.KIN,
+ act: ActionType=ActionType.RPM
+ ):
+ """Initialization of a single agent RL environment.
+
+ Using the generic single agent RL superclass.
+
+ Parameters
+ ----------
+ drone_model : DroneModel, optional
+ The desired drone type (detailed in an .urdf file in folder `assets`).
+ initial_xyzs: ndarray | None, optional
+ (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
+ initial_rpys: ndarray | None, optional
+ (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
+ physics : Physics, optional
+ The desired implementation of PyBullet physics/custom dynamics.
+ pyb_freq : int, optional
+ The frequency at which PyBullet steps (a multiple of ctrl_freq).
+ ctrl_freq : int, optional
+ The frequency at which the environment steps.
+ gui : bool, optional
+ Whether to use PyBullet's GUI.
+ record : bool, optional
+ Whether to save a video of the simulation.
+ obs : ObservationType, optional
+ The type of observation space (kinematic information or vision)
+ act : ActionType, optional
+ The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control)
+
+ """
+ self.TARGET_POS = np.array([0,0,1])
+ self.EPISODE_LEN_SEC = 8
+ super().__init__(drone_model=drone_model,
+ num_drones=1,
+ initial_xyzs=initial_xyzs,
+ initial_rpys=initial_rpys,
+ physics=physics,
+ pyb_freq=pyb_freq,
+ ctrl_freq=ctrl_freq,
+ gui=gui,
+ record=record,
+ obs=obs,
+ act=act
+ )
+
+ ################################################################################
+
+ def _computeReward(self):
+ """Computes the current reward value.
+
+ Returns
+ -------
+ float
+ The reward.
+
+ """
+ state = self._getDroneStateVector(0)
+ ret = max(0, 2 - np.linalg.norm(self.TARGET_POS-state[0:3])**4)
+ return ret
+
+ ################################################################################
+
+ def _computeTerminated(self):
+ """Computes the current done value.
+
+ Returns
+ -------
+ bool
+ Whether the current episode is done.
+
+ """
+ state = self._getDroneStateVector(0)
+ if np.linalg.norm(self.TARGET_POS-state[0:3]) < .0001:
+ return True
+ else:
+ return False
+
+ ################################################################################
+
+ def _computeTruncated(self):
+ """Computes the current truncated value.
+
+ Returns
+ -------
+ bool
+ Whether the current episode timed out.
+
+ """
+ if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC:
+ return True
+ else:
+ return False
+
+ ################################################################################
+
+ def _computeInfo(self):
+ """Computes the current info dict(s).
+
+ Unused.
+
+ Returns
+ -------
+ dict[str, int]
+ Dummy value.
+
+ """
+ return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years
diff --git a/gym_pybullet_drones/envs/MultiHoverAviary.py b/gym_pybullet_drones/envs/MultiHoverAviary.py
new file mode 100644
index 000000000..f1616cf5f
--- /dev/null
+++ b/gym_pybullet_drones/envs/MultiHoverAviary.py
@@ -0,0 +1,140 @@
+import numpy as np
+
+from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary
+from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType
+
+class MultiHoverAviary(BaseRLAviary):
+ """Multi-agent RL problem: leader-follower."""
+
+ ################################################################################
+
+ def __init__(self,
+ drone_model: DroneModel=DroneModel.CF2X,
+ num_drones: int=2,
+ neighbourhood_radius: float=np.inf,
+ initial_xyzs=None,
+ initial_rpys=None,
+ physics: Physics=Physics.PYB,
+ pyb_freq: int = 240,
+ ctrl_freq: int = 30,
+ gui=False,
+ record=False,
+ obs: ObservationType=ObservationType.KIN,
+ act: ActionType=ActionType.RPM
+ ):
+ """Initialization of a multi-agent RL environment.
+
+ Using the generic multi-agent RL superclass.
+
+ Parameters
+ ----------
+ drone_model : DroneModel, optional
+ The desired drone type (detailed in an .urdf file in folder `assets`).
+ num_drones : int, optional
+ The desired number of drones in the aviary.
+ neighbourhood_radius : float, optional
+ Radius used to compute the drones' adjacency matrix, in meters.
+ initial_xyzs: ndarray | None, optional
+ (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
+ initial_rpys: ndarray | None, optional
+ (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
+ physics : Physics, optional
+ The desired implementation of PyBullet physics/custom dynamics.
+ pyb_freq : int, optional
+ The frequency at which PyBullet steps (a multiple of ctrl_freq).
+ ctrl_freq : int, optional
+ The frequency at which the environment steps.
+ gui : bool, optional
+ Whether to use PyBullet's GUI.
+ record : bool, optional
+ Whether to save a video of the simulation.
+ obs : ObservationType, optional
+ The type of observation space (kinematic information or vision)
+ act : ActionType, optional
+ The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control)
+
+ """
+ self.EPISODE_LEN_SEC = 8
+ super().__init__(drone_model=drone_model,
+ num_drones=num_drones,
+ neighbourhood_radius=neighbourhood_radius,
+ initial_xyzs=initial_xyzs,
+ initial_rpys=initial_rpys,
+ physics=physics,
+ pyb_freq=pyb_freq,
+ ctrl_freq=ctrl_freq,
+ gui=gui,
+ record=record,
+ obs=obs,
+ act=act
+ )
+
+ self.TARGET_POS = self.INIT_XYZS + np.array([[0,0,1/(i+1)] for i in range(num_drones)])
+
+ ################################################################################
+
+ def _computeReward(self):
+ """Computes the current reward value.
+
+ Returns
+ -------
+ float
+ The reward.
+
+ """
+ states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)])
+ ret = 0
+ for i in range(self.NUM_DRONES):
+ ret += max(0, 2 - np.linalg.norm(self.TARGET_POS[i,:]-states[i][0:3])**4)
+ return ret
+
+ ################################################################################
+
+ def _computeTerminated(self):
+ """Computes the current done value.
+
+ Returns
+ -------
+ bool
+ Whether the current episode is done.
+
+ """
+ states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)])
+ dist = 0
+ for i in range(self.NUM_DRONES):
+ dist += np.linalg.norm(self.TARGET_POS[i,:]-states[i][0:3])
+ if dist < .0001:
+ return True
+ else:
+ return False
+
+ ################################################################################
+
+ def _computeTruncated(self):
+ """Computes the current truncated value.
+
+ Returns
+ -------
+ bool
+ Whether the current episode timed out.
+
+ """
+ if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC:
+ return True
+ else:
+ return False
+
+ ################################################################################
+
+ def _computeInfo(self):
+ """Computes the current info dict(s).
+
+ Unused.
+
+ Returns
+ -------
+ dict[str, int]
+ Dummy value.
+
+ """
+ return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years
diff --git a/gym_pybullet_drones/envs/RLAviary.py b/gym_pybullet_drones/envs/RLAviary.py
deleted file mode 100644
index bbf5bdb4b..000000000
--- a/gym_pybullet_drones/envs/RLAviary.py
+++ /dev/null
@@ -1,2 +0,0 @@
-'''TBD
-'''
diff --git a/gym_pybullet_drones/envs/VelocityAviary.py b/gym_pybullet_drones/envs/VelocityAviary.py
index c522c80df..56416ef8b 100644
--- a/gym_pybullet_drones/envs/VelocityAviary.py
+++ b/gym_pybullet_drones/envs/VelocityAviary.py
@@ -49,7 +49,7 @@ def __init__(self,
gui : bool, optional
Whether to use PyBullet's GUI.
record : bool, optional
- Whether to save a video of the simulation in folder `files/videos/`.
+ Whether to save a video of the simulation.
obstacles : bool, optional
Whether to add obstacles to the simulation.
user_debug_gui : bool, optional
diff --git a/gym_pybullet_drones/envs/__init__.py b/gym_pybullet_drones/envs/__init__.py
index d630927c1..fc27b99d2 100644
--- a/gym_pybullet_drones/envs/__init__.py
+++ b/gym_pybullet_drones/envs/__init__.py
@@ -1,2 +1,5 @@
+from gym_pybullet_drones.envs.BetaAviary import BetaAviary
from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary
+from gym_pybullet_drones.envs.HoverAviary import HoverAviary
+from gym_pybullet_drones.envs.MultiHoverAviary import MultiHoverAviary
from gym_pybullet_drones.envs.VelocityAviary import VelocityAviary
diff --git a/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py b/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py
deleted file mode 100644
index b28f8accd..000000000
--- a/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py
+++ /dev/null
@@ -1,265 +0,0 @@
-import math
-import numpy as np
-
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
-from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary
-
-class FlockAviary(BaseMultiagentAviary):
- """Multi-agent RL problem: flocking."""
-
- ################################################################################
-
- def __init__(self,
- drone_model: DroneModel=DroneModel.CF2X,
- num_drones: int=2,
- neighbourhood_radius: float=np.inf,
- initial_xyzs=None,
- initial_rpys=None,
- physics: Physics=Physics.PYB,
- pyb_freq: int = 240,
- ctrl_freq: int = 240,
- gui=False,
- record=False,
- obs: ObservationType=ObservationType.KIN,
- act: ActionType=ActionType.RPM):
- """Initialization of a multi-agent RL environment.
-
- Using the generic multi-agent RL superclass.
-
- Parameters
- ----------
- drone_model : DroneModel, optional
- The desired drone type (detailed in an .urdf file in folder `assets`).
- num_drones : int, optional
- The desired number of drones in the aviary.
- neighbourhood_radius : float, optional
- Radius used to compute the drones' adjacency matrix, in meters.
- initial_xyzs: ndarray | None, optional
- (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
- initial_rpys: ndarray | None, optional
- (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
- physics : Physics, optional
- The desired implementation of PyBullet physics/custom dynamics.
- pyb_freq : int, optional
- The frequency at which PyBullet steps (a multiple of ctrl_freq).
- ctrl_freq : int, optional
- The frequency at which the environment steps.
- gui : bool, optional
- Whether to use PyBullet's GUI.
- record : bool, optional
- Whether to save a video of the simulation in folder `files/videos/`.
- obs : ObservationType, optional
- The type of observation space (kinematic information or vision)
- act : ActionType, optional
- The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control)
-
- """
- super().__init__(drone_model=drone_model,
- num_drones=num_drones,
- neighbourhood_radius=neighbourhood_radius,
- initial_xyzs=initial_xyzs,
- initial_rpys=initial_rpys,
- physics=physics,
- pyb_freq=pyb_freq,
- ctrl_freq=ctrl_freq,
- gui=gui,
- record=record,
- obs=obs,
- act=act
- )
-
- ################################################################################
-
- def _computeReward(self):
- """Computes the current reward value(s).
-
- Returns
- -------
- dict[int, float]
- The reward value for each drone.
-
- """
- rewards = {}
- states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)])
- rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 1]) - states[0, 0:3])**2
- for i in range(1, self.NUM_DRONES):
- rewards[i] = -1 * np.linalg.norm(states[i-1, 2] - states[i, 2])**2
- return rewards
- """
- # obs here is dictionary of the form {"i":{"state": Box(20,), "neighbors": MultiBinary(NUM_DRONES)}}
- # parse velocity and position
- vel = np.zeros((1, self.NUM_DRONES, 3)); pos = np.zeros((1, self.NUM_DRONES, 3))
- for i in range(self.NUM_DRONES):
- pos[0,i,:] = obs[ i ]["state"][0:3]
- vel[0,i,:] = obs[ i ]["state"][10:13]
- # compute metrics
- # velocity alignment
- ali = 0
- EPSILON = 1e-3 # avoid divide by zero
- linear_vel_norm = np.linalg.norm(vel, axis=2)
- for i in range(self.NUM_DRONES):
- for j in range(self.NUM_DRONES):
- if j != i:
- d = np.einsum('ij,ij->i', vel[:, i, :], vel[:, j, :])
- ali += (d / (linear_vel_norm[:, i] + EPSILON) / (linear_vel_norm[:, j] + EPSILON))
- ali /= (self.NUM_DRONES * (self.NUM_DRONES - 1))
- # flocking speed
- cof_v = np.mean(vel, axis=1) # center of flock speed
- avg_flock_linear_speed = np.linalg.norm(cof_v, axis=-1)
- # spacing
- whole_flock_spacing = []
- for i in range(self.NUM_DRONES):
- flck_neighbor_pos = np.delete(pos, [i], 1)
- drone_neighbor_pos_diff = flck_neighbor_pos - np.reshape(pos[:, i, :], (pos[:, i, :].shape[0], 1, -1))
- drone_neighbor_dis = np.linalg.norm(drone_neighbor_pos_diff, axis=-1)
- drone_spacing = np.amin(drone_neighbor_dis, axis=-1)
- whole_flock_spacing.append(drone_spacing)
- whole_flock_spacing = np.stack(whole_flock_spacing, axis=-1)
- avg_flock_spacing = np.mean(whole_flock_spacing, axis=-1)
- var_flock_spacing = np.var(whole_flock_spacing, axis=-1)
- # flocking metrics
- FLOCK_SPACING_MIN = 1.0; FLOCK_SPACING_MAX = 3.0
- if FLOCK_SPACING_MIN < avg_flock_spacing[0] < FLOCK_SPACING_MAX:
- avg_flock_spac_rew = 0.0
- else:
- avg_flock_spac_rew = min(math.fabs(avg_flock_spacing[0] - FLOCK_SPACING_MIN),
- math.fabs(avg_flock_spacing[0] - FLOCK_SPACING_MAX))
- reward = ali[0] + avg_flock_linear_speed[0] - avg_flock_spac_rew - var_flock_spacing[0]
- return { i : reward for i in range(self.NUM_DRONES) }
- """
-
- ################################################################################
-
- def _computeTerminated(self):
- """Computes the current done value(s).
-
- Returns
- -------
- dict[int | "__all__", bool]
- Dictionary with the done value of each drone and
- one additional boolean value for key "__all__".
-
- """
- bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False
- done = {i: bool_val for i in range(self.NUM_DRONES)}
- done["__all__"] = True if True in done.values() else False
- return done
-
- ################################################################################
-
- def _computeTruncated(self):
- """Computes the current truncated value(s).
-
- Unused in this implementation.
-
- Returns
- -------
- bool
- Always false.
-
- """
- return False
-
- ################################################################################
-
- def _computeInfo(self):
- """Computes the current info dict(s).
-
- Unused.
-
- Returns
- -------
- dict[int, dict[]]
- Dictionary of empty dictionaries.
-
- """
- return {i: {} for i in range(self.NUM_DRONES)}
-
- ################################################################################
-
- def _clipAndNormalizeState(self,
- state
- ):
- """Normalizes a drone's state to the [-1,1] range.
-
- Parameters
- ----------
- state : ndarray
- (20,)-shaped array of floats containing the non-normalized state of a single drone.
-
- Returns
- -------
- ndarray
- (20,)-shaped array of floats containing the normalized state of a single drone.
-
- """
- MAX_LIN_VEL_XY = 3
- MAX_LIN_VEL_Z = 1
-
- MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC
- MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC
-
- MAX_PITCH_ROLL = np.pi # Full range
-
- clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY)
- clipped_pos_z = np.clip(state[2], 0, MAX_Z)
- clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL)
- clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY)
- clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z)
-
- if self.GUI:
- self._clipAndNormalizeStateWarning(state,
- clipped_pos_xy,
- clipped_pos_z,
- clipped_rp,
- clipped_vel_xy,
- clipped_vel_z
- )
-
- normalized_pos_xy = clipped_pos_xy / MAX_XY
- normalized_pos_z = clipped_pos_z / MAX_Z
- normalized_rp = clipped_rp / MAX_PITCH_ROLL
- normalized_y = state[9] / np.pi # No reason to clip
- normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY
- normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY
- normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16]
-
- norm_and_clipped = np.hstack([normalized_pos_xy,
- normalized_pos_z,
- state[3:7],
- normalized_rp,
- normalized_y,
- normalized_vel_xy,
- normalized_vel_z,
- normalized_ang_vel,
- state[16:20]
- ]).reshape(20,)
-
- return norm_and_clipped
-
- ################################################################################
-
- def _clipAndNormalizeStateWarning(self,
- state,
- clipped_pos_xy,
- clipped_pos_z,
- clipped_rp,
- clipped_vel_xy,
- clipped_vel_z,
- ):
- """Debugging printouts associated to `_clipAndNormalizeState`.
-
- Print a warning if values in a state vector is out of the clipping range.
-
- """
- if not(clipped_pos_xy == np.array(state[0:2])).all():
- print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1]))
- if not(clipped_pos_z == np.array(state[2])).all():
- print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2]))
- if not(clipped_rp == np.array(state[7:9])).all():
- print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8]))
- if not(clipped_vel_xy == np.array(state[10:12])).all():
- print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11]))
- if not(clipped_vel_z == np.array(state[12])).all():
- print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12]))
diff --git a/gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py b/gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py
deleted file mode 100644
index 4722ac2aa..000000000
--- a/gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py
+++ /dev/null
@@ -1,223 +0,0 @@
-import numpy as np
-
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
-from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary
-
-class LeaderFollowerAviary(BaseMultiagentAviary):
- """Multi-agent RL problem: leader-follower."""
-
- ################################################################################
-
- def __init__(self,
- drone_model: DroneModel=DroneModel.CF2X,
- num_drones: int=2,
- neighbourhood_radius: float=np.inf,
- initial_xyzs=None,
- initial_rpys=None,
- physics: Physics=Physics.PYB,
- pyb_freq: int = 240,
- ctrl_freq: int = 240,
- gui=False,
- record=False,
- obs: ObservationType=ObservationType.KIN,
- act: ActionType=ActionType.RPM):
- """Initialization of a multi-agent RL environment.
-
- Using the generic multi-agent RL superclass.
-
- Parameters
- ----------
- drone_model : DroneModel, optional
- The desired drone type (detailed in an .urdf file in folder `assets`).
- num_drones : int, optional
- The desired number of drones in the aviary.
- neighbourhood_radius : float, optional
- Radius used to compute the drones' adjacency matrix, in meters.
- initial_xyzs: ndarray | None, optional
- (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
- initial_rpys: ndarray | None, optional
- (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
- physics : Physics, optional
- The desired implementation of PyBullet physics/custom dynamics.
- pyb_freq : int, optional
- The frequency at which PyBullet steps (a multiple of ctrl_freq).
- ctrl_freq : int, optional
- The frequency at which the environment steps.
- gui : bool, optional
- Whether to use PyBullet's GUI.
- record : bool, optional
- Whether to save a video of the simulation in folder `files/videos/`.
- obs : ObservationType, optional
- The type of observation space (kinematic information or vision)
- act : ActionType, optional
- The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control)
-
- """
- super().__init__(drone_model=drone_model,
- num_drones=num_drones,
- neighbourhood_radius=neighbourhood_radius,
- initial_xyzs=initial_xyzs,
- initial_rpys=initial_rpys,
- physics=physics,
- pyb_freq=pyb_freq,
- ctrl_freq=ctrl_freq,
- gui=gui,
- record=record,
- obs=obs,
- act=act
- )
-
- ################################################################################
-
- def _computeReward(self):
- """Computes the current reward value(s).
-
- Returns
- -------
- dict[int, float]
- The reward value for each drone.
-
- """
- rewards = {}
- states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)])
- rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 0.5]) - states[0, 0:3])**2
- # rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD
- for i in range(1, self.NUM_DRONES):
- rewards[i] = -(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2
- return rewards
-
- ################################################################################
-
- def _computeTerminated(self):
- """Computes the current done value(s).
-
- Returns
- -------
- dict[int | "__all__", bool]
- Dictionary with the done value of each drone and
- one additional boolean value for key "__all__".
-
- """
- bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False
- done = {i: bool_val for i in range(self.NUM_DRONES)}
- done["__all__"] = bool_val # True if True in done.values() else False
- return done
-
- ################################################################################
-
- def _computeTruncated(self):
- """Computes the current truncated value(s).
-
- Unused in this implementation.
-
- Returns
- -------
- bool
- Always false.
-
- """
- return False
-
- ################################################################################
-
- def _computeInfo(self):
- """Computes the current info dict(s).
-
- Unused.
-
- Returns
- -------
- dict[int, dict[]]
- Dictionary of empty dictionaries.
-
- """
- return {i: {} for i in range(self.NUM_DRONES)}
-
- ################################################################################
-
- def _clipAndNormalizeState(self,
- state
- ):
- """Normalizes a drone's state to the [-1,1] range.
-
- Parameters
- ----------
- state : ndarray
- (20,)-shaped array of floats containing the non-normalized state of a single drone.
-
- Returns
- -------
- ndarray
- (20,)-shaped array of floats containing the normalized state of a single drone.
-
- """
- MAX_LIN_VEL_XY = 3
- MAX_LIN_VEL_Z = 1
-
- MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC
- MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC
-
- MAX_PITCH_ROLL = np.pi # Full range
-
- clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY)
- clipped_pos_z = np.clip(state[2], 0, MAX_Z)
- clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL)
- clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY)
- clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z)
-
- if self.GUI:
- self._clipAndNormalizeStateWarning(state,
- clipped_pos_xy,
- clipped_pos_z,
- clipped_rp,
- clipped_vel_xy,
- clipped_vel_z
- )
-
- normalized_pos_xy = clipped_pos_xy / MAX_XY
- normalized_pos_z = clipped_pos_z / MAX_Z
- normalized_rp = clipped_rp / MAX_PITCH_ROLL
- normalized_y = state[9] / np.pi # No reason to clip
- normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY
- normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY
- normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16]
-
- norm_and_clipped = np.hstack([normalized_pos_xy,
- normalized_pos_z,
- state[3:7],
- normalized_rp,
- normalized_y,
- normalized_vel_xy,
- normalized_vel_z,
- normalized_ang_vel,
- state[16:20]
- ]).reshape(20,)
-
- return norm_and_clipped
-
- ################################################################################
-
- def _clipAndNormalizeStateWarning(self,
- state,
- clipped_pos_xy,
- clipped_pos_z,
- clipped_rp,
- clipped_vel_xy,
- clipped_vel_z,
- ):
- """Debugging printouts associated to `_clipAndNormalizeState`.
-
- Print a warning if values in a state vector is out of the clipping range.
-
- """
- if not(clipped_pos_xy == np.array(state[0:2])).all():
- print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1]))
- if not(clipped_pos_z == np.array(state[2])).all():
- print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2]))
- if not(clipped_rp == np.array(state[7:9])).all():
- print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8]))
- if not(clipped_vel_xy == np.array(state[10:12])).all():
- print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11]))
- if not(clipped_vel_z == np.array(state[12])).all():
- print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12]))
diff --git a/gym_pybullet_drones/envs/multi_agent_rl/__init__.py b/gym_pybullet_drones/envs/multi_agent_rl/__init__.py
deleted file mode 100644
index 09197c8d5..000000000
--- a/gym_pybullet_drones/envs/multi_agent_rl/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary
-from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary
-from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary
diff --git a/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py b/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py
deleted file mode 100644
index 5b4745cb3..000000000
--- a/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py
+++ /dev/null
@@ -1,305 +0,0 @@
-import os
-from enum import Enum
-import numpy as np
-from gymnasium import spaces
-import pybullet as p
-
-from gym_pybullet_drones.envs.BaseAviary import BaseAviary
-from gym_pybullet_drones.utils.enums import DroneModel, Physics, ImageType, ActionType, ObservationType
-from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl
-
-
-class BaseSingleAgentAviary(BaseAviary):
- """Base single drone environment class for reinforcement learning."""
-
- ################################################################################
-
- def __init__(self,
- drone_model: DroneModel=DroneModel.CF2X,
- initial_xyzs=None,
- initial_rpys=None,
- physics: Physics=Physics.PYB,
- pyb_freq: int = 240,
- ctrl_freq: int = 240,
- gui=False,
- record=False,
- obs: ObservationType=ObservationType.KIN,
- act: ActionType=ActionType.RPM
- ):
- """Initialization of a generic single agent RL environment.
-
- Attribute `num_drones` is automatically set to 1; `vision_attributes`
- are selected based on the choice of `obs` and `act`; `obstacles` is
- set to True and overridden with landmarks for vision applications;
- `user_debug_gui` is set to False for performance.
-
- Parameters
- ----------
- drone_model : DroneModel, optional
- The desired drone type (detailed in an .urdf file in folder `assets`).
- initial_xyzs: ndarray | None, optional
- (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
- initial_rpys: ndarray | None, optional
- (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
- physics : Physics, optional
- The desired implementation of PyBullet physics/custom dynamics.
- pyb_freq : int, optional
- The frequency at which PyBullet steps (a multiple of ctrl_freq).
- ctrl_freq : int, optional
- The frequency at which the environment steps.
- gui : bool, optional
- Whether to use PyBullet's GUI.
- record : bool, optional
- Whether to save a video of the simulation in folder `files/videos/`.
- obs : ObservationType, optional
- The type of observation space (kinematic information or vision)
- act : ActionType, optional
- The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.)
-
- """
- vision_attributes = True if obs == ObservationType.RGB else False
- self.OBS_TYPE = obs
- self.ACT_TYPE = act
- self.EPISODE_LEN_SEC = 5
- #### Create integrated controllers #########################
- if act in [ActionType.PID, ActionType.VEL, ActionType.ONE_D_PID]:
- os.environ['KMP_DUPLICATE_LIB_OK']='True'
- if drone_model in [DroneModel.CF2X, DroneModel.CF2P]:
- self.ctrl = DSLPIDControl(drone_model=DroneModel.CF2X)
- else:
- print("[ERROR] in BaseSingleAgentAviary.__init()__, no controller is available for the specified drone_model")
- super().__init__(drone_model=drone_model,
- num_drones=1,
- initial_xyzs=initial_xyzs,
- initial_rpys=initial_rpys,
- physics=physics,
- pyb_freq=pyb_freq,
- ctrl_freq=ctrl_freq,
- gui=gui,
- record=record,
- obstacles=True, # Add obstacles for RGB observations and/or FlyThruGate
- user_debug_gui=False, # Remove of RPM sliders from all single agent learning aviaries
- vision_attributes=vision_attributes,
- )
- #### Set a limit on the maximum target speed ###############
- if act == ActionType.VEL:
- self.SPEED_LIMIT = 0.03 * self.MAX_SPEED_KMH * (1000/3600)
-
- ################################################################################
-
- def _addObstacles(self):
- """Add obstacles to the environment.
-
- Only if the observation is of type RGB, 4 landmarks are added.
- Overrides BaseAviary's method.
-
- """
- if self.OBS_TYPE == ObservationType.RGB:
- p.loadURDF("block.urdf",
- [1, 0, .1],
- p.getQuaternionFromEuler([0, 0, 0]),
- physicsClientId=self.CLIENT
- )
- p.loadURDF("cube_small.urdf",
- [0, 1, .1],
- p.getQuaternionFromEuler([0, 0, 0]),
- physicsClientId=self.CLIENT
- )
- p.loadURDF("duck_vhacd.urdf",
- [-1, 0, .1],
- p.getQuaternionFromEuler([0, 0, 0]),
- physicsClientId=self.CLIENT
- )
- p.loadURDF("teddy_vhacd.urdf",
- [0, -1, .1],
- p.getQuaternionFromEuler([0, 0, 0]),
- physicsClientId=self.CLIENT
- )
- else:
- pass
-
- ################################################################################
-
- def _actionSpace(self):
- """Returns the action space of the environment.
-
- Returns
- -------
- ndarray
- A Box() of size 1, 3, 4, or 6 depending on the action type.
-
- """
- if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]:
- size = 4
- elif self.ACT_TYPE == ActionType.PID:
- size = 3
- elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]:
- size = 1
- else:
- print("[ERROR] in BaseSingleAgentAviary._actionSpace()")
- exit()
- return spaces.Box(low=-1*np.ones(size),
- # return spaces.Box(low=np.zeros(size), # Alternative action space, see PR #32
- high=np.ones(size),
- dtype=np.float32
- )
-
- ################################################################################
-
- def _preprocessAction(self,
- action
- ):
- """Pre-processes the action passed to `.step()` into motors' RPMs.
-
- Parameter `action` is processed differenly for each of the different
- action types: `action` can be of length 1, 3, 4, or 6 and represent
- RPMs, desired thrust and torques, the next target position to reach
- using PID control, a desired velocity vector, new PID coefficients, etc.
-
- Parameters
- ----------
- action : ndarray
- The input action for each drone, to be translated into RPMs.
-
- Returns
- -------
- ndarray
- (4,)-shaped array of ints containing to clipped RPMs
- commanded to the 4 motors of each drone.
-
- """
- if self.ACT_TYPE == ActionType.RPM:
- return np.array(self.HOVER_RPM * (1+0.05*action))
- elif self.ACT_TYPE == ActionType.PID:
- state = self._getDroneStateVector(0)
- next_pos = self._calculateNextStep(
- current_position=state[0:3],
- destination=action,
- step_size=1,
- )
- rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP,
- cur_pos=state[0:3],
- cur_quat=state[3:7],
- cur_vel=state[10:13],
- cur_ang_vel=state[13:16],
- target_pos=next_pos
- )
- return rpm
- elif self.ACT_TYPE == ActionType.VEL:
- state = self._getDroneStateVector(0)
- if np.linalg.norm(action[0:3]) != 0:
- v_unit_vector = action[0:3] / np.linalg.norm(action[0:3])
- else:
- v_unit_vector = np.zeros(3)
- rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP,
- cur_pos=state[0:3],
- cur_quat=state[3:7],
- cur_vel=state[10:13],
- cur_ang_vel=state[13:16],
- target_pos=state[0:3], # same as the current position
- target_rpy=np.array([0,0,state[9]]), # keep current yaw
- target_vel=self.SPEED_LIMIT * np.abs(action[3]) * v_unit_vector # target the desired velocity vector
- )
- return rpm
- elif self.ACT_TYPE == ActionType.ONE_D_RPM:
- return np.repeat(self.HOVER_RPM * (1+0.05*action), 4)
- elif self.ACT_TYPE == ActionType.ONE_D_PID:
- state = self._getDroneStateVector(0)
- rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP,
- cur_pos=state[0:3],
- cur_quat=state[3:7],
- cur_vel=state[10:13],
- cur_ang_vel=state[13:16],
- target_pos=state[0:3]+0.1*np.array([0,0,action[0]])
- )
- return rpm
- else:
- print("[ERROR] in BaseSingleAgentAviary._preprocessAction()")
-
- ################################################################################
-
- def _observationSpace(self):
- """Returns the observation space of the environment.
-
- Returns
- -------
- ndarray
- A Box() of shape (H,W,4) or (12,) depending on the observation type.
-
- """
- if self.OBS_TYPE == ObservationType.RGB:
- return spaces.Box(low=0,
- high=255,
- shape=(self.IMG_RES[1], self.IMG_RES[0], 4),
- dtype=np.uint8
- )
- elif self.OBS_TYPE == ObservationType.KIN:
- ############################################################
- #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS)
- #### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ P0 P1 P2 P3
- # obs_lower_bound = np.array([-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])
- # obs_upper_bound = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
- # return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 )
- ############################################################
- #### OBS SPACE OF SIZE 12
- return spaces.Box(low=np.array([-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1]),
- high=np.array([1,1,1, 1,1,1, 1,1,1, 1,1,1]),
- dtype=np.float32
- )
- ############################################################
- else:
- print("[ERROR] in BaseSingleAgentAviary._observationSpace()")
-
- ################################################################################
-
- def _computeObs(self):
- """Returns the current observation of the environment.
-
- Returns
- -------
- ndarray
- A Box() of shape (H,W,4) or (12,) depending on the observation type.
-
- """
- if self.OBS_TYPE == ObservationType.RGB:
- if self.step_counter%self.IMG_CAPTURE_FREQ == 0:
- self.rgb[0], self.dep[0], self.seg[0] = self._getDroneImages(0,
- segmentation=False
- )
- #### Printing observation to PNG frames example ############
- if self.RECORD:
- self._exportImage(img_type=ImageType.RGB,
- img_input=self.rgb[0],
- path=self.ONBOARD_IMG_PATH,
- frame_num=int(self.step_counter/self.IMG_CAPTURE_FREQ)
- )
- return self.rgb[0]
- elif self.OBS_TYPE == ObservationType.KIN:
- obs = self._clipAndNormalizeState(self._getDroneStateVector(0))
- ############################################################
- #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS)
- # return obs
- ############################################################
- #### OBS SPACE OF SIZE 12
- ret = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,)
- return ret.astype('float32')
- ############################################################
- else:
- print("[ERROR] in BaseSingleAgentAviary._computeObs()")
-
- ################################################################################
-
- def _clipAndNormalizeState(self,
- state
- ):
- """Normalizes a drone's state to the [-1,1] range.
-
- Must be implemented in a subclass.
-
- Parameters
- ----------
- state : ndarray
- Array containing the non-normalized state of a single drone.
-
- """
- raise NotImplementedError
diff --git a/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py b/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py
deleted file mode 100644
index f1b072b20..000000000
--- a/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py
+++ /dev/null
@@ -1,240 +0,0 @@
-import os
-import numpy as np
-import pybullet as p
-import pkg_resources
-
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType, BaseSingleAgentAviary
-
-
-class FlyThruGateAviary(BaseSingleAgentAviary):
- """Single agent RL problem: fly through a gate."""
-
- ################################################################################
-
- def __init__(self,
- drone_model: DroneModel=DroneModel.CF2X,
- initial_xyzs=None,
- initial_rpys=None,
- physics: Physics=Physics.PYB,
- pyb_freq: int = 240,
- ctrl_freq: int = 240,
- gui=False,
- record=False,
- obs: ObservationType=ObservationType.KIN,
- act: ActionType=ActionType.RPM
- ):
- """Initialization of a single agent RL environment.
-
- Using the generic single agent RL superclass.
-
- Parameters
- ----------
- drone_model : DroneModel, optional
- The desired drone type (detailed in an .urdf file in folder `assets`).
- initial_xyzs: ndarray | None, optional
- (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
- initial_rpys: ndarray | None, optional
- (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
- physics : Physics, optional
- The desired implementation of PyBullet physics/custom dynamics.
- pyb_freq : int, optional
- The frequency at which PyBullet steps (a multiple of ctrl_freq).
- ctrl_freq : int, optional
- The frequency at which the environment steps.
- gui : bool, optional
- Whether to use PyBullet's GUI.
- record : bool, optional
- Whether to save a video of the simulation in folder `files/videos/`.
- obs : ObservationType, optional
- The type of observation space (kinematic information or vision)
- act : ActionType, optional
- The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control)
-
- """
- super().__init__(drone_model=drone_model,
- initial_xyzs=initial_xyzs,
- initial_rpys=initial_rpys,
- physics=physics,
- pyb_freq=pyb_freq,
- ctrl_freq=ctrl_freq,
- gui=gui,
- record=record,
- obs=obs,
- act=act
- )
-
- ################################################################################
-
- def _addObstacles(self):
- """Add obstacles to the environment.
-
- Extends the superclass method and add the gate build of cubes and an architrave.
-
- """
- super()._addObstacles()
- p.loadURDF(pkg_resources.resource_filename('gym_pybullet_drones', 'assets/architrave.urdf'),
- [0, -1, .55],
- p.getQuaternionFromEuler([0, 0, 0]),
- physicsClientId=self.CLIENT
- )
- for i in range(10):
- p.loadURDF("cube_small.urdf",
- [-.3, -1, .02+i*0.05],
- p.getQuaternionFromEuler([0, 0, 0]),
- physicsClientId=self.CLIENT
- )
- p.loadURDF("cube_small.urdf",
- [.3, -1, .02+i*0.05],
- p.getQuaternionFromEuler([0,0,0]),
- physicsClientId=self.CLIENT
- )
-
- ################################################################################
-
- def _computeReward(self):
- """Computes the current reward value.
-
- Returns
- -------
- float
- The reward.
-
- """
- state = self._getDroneStateVector(0)
- norm_ep_time = (self.step_counter/self.PYB_FREQ) / self.EPISODE_LEN_SEC
- return -10 * np.linalg.norm(np.array([0, -2*norm_ep_time, 0.75])-state[0:3])**2
-
- ################################################################################
-
- def _computeTerminated(self):
- """Computes the current done value.
-
- Returns
- -------
- bool
- Whether the current episode is done.
-
- """
- if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC:
- return True
- else:
- return False
-
- ################################################################################
-
- def _computeTruncated(self):
- """Computes the current truncated value(s).
-
- Unused in this implementation.
-
- Returns
- -------
- bool
- Always false.
-
- """
- return False
-
- ################################################################################
-
- def _computeInfo(self):
- """Computes the current info dict(s).
-
- Unused.
-
- Returns
- -------
- dict[str, int]
- Dummy value.
-
- """
- return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years
-
- ################################################################################
-
- def _clipAndNormalizeState(self,
- state
- ):
- """Normalizes a drone's state to the [-1,1] range.
-
- Parameters
- ----------
- state : ndarray
- (20,)-shaped array of floats containing the non-normalized state of a single drone.
-
- Returns
- -------
- ndarray
- (20,)-shaped array of floats containing the normalized state of a single drone.
-
- """
- MAX_LIN_VEL_XY = 3
- MAX_LIN_VEL_Z = 1
-
- MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC
- MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC
-
- MAX_PITCH_ROLL = np.pi # Full range
-
- clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY)
- clipped_pos_z = np.clip(state[2], 0, MAX_Z)
- clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL)
- clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY)
- clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z)
-
- if self.GUI:
- self._clipAndNormalizeStateWarning(state,
- clipped_pos_xy,
- clipped_pos_z,
- clipped_rp,
- clipped_vel_xy,
- clipped_vel_z
- )
-
- normalized_pos_xy = clipped_pos_xy / MAX_XY
- normalized_pos_z = clipped_pos_z / MAX_Z
- normalized_rp = clipped_rp / MAX_PITCH_ROLL
- normalized_y = state[9] / np.pi # No reason to clip
- normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY
- normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY
- normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16]
-
- norm_and_clipped = np.hstack([normalized_pos_xy,
- normalized_pos_z,
- state[3:7],
- normalized_rp,
- normalized_y,
- normalized_vel_xy,
- normalized_vel_z,
- normalized_ang_vel,
- state[16:20]
- ]).reshape(20,)
-
- return norm_and_clipped
-
- ################################################################################
-
- def _clipAndNormalizeStateWarning(self,
- state,
- clipped_pos_xy,
- clipped_pos_z,
- clipped_rp,
- clipped_vel_xy,
- clipped_vel_z,
- ):
- """Debugging printouts associated to `_clipAndNormalizeState`.
-
- Print a warning if values in a state vector is out of the clipping range.
-
- """
- if not(clipped_pos_xy == np.array(state[0:2])).all():
- print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1]))
- if not(clipped_pos_z == np.array(state[2])).all():
- print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2]))
- if not(clipped_rp == np.array(state[7:9])).all():
- print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8]))
- if not(clipped_vel_xy == np.array(state[10:12])).all():
- print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11]))
- if not(clipped_vel_z == np.array(state[12])).all():
- print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12]))
diff --git a/gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py b/gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py
deleted file mode 100644
index 96fd10c2a..000000000
--- a/gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py
+++ /dev/null
@@ -1,209 +0,0 @@
-import numpy as np
-
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType, BaseSingleAgentAviary
-
-class HoverAviary(BaseSingleAgentAviary):
- """Single agent RL problem: hover at position."""
-
- ################################################################################
-
- def __init__(self,
- drone_model: DroneModel=DroneModel.CF2X,
- initial_xyzs=None,
- initial_rpys=None,
- physics: Physics=Physics.PYB,
- pyb_freq: int = 240,
- ctrl_freq: int = 240,
- gui=False,
- record=False,
- obs: ObservationType=ObservationType.KIN,
- act: ActionType=ActionType.RPM
- ):
- """Initialization of a single agent RL environment.
-
- Using the generic single agent RL superclass.
-
- Parameters
- ----------
- drone_model : DroneModel, optional
- The desired drone type (detailed in an .urdf file in folder `assets`).
- initial_xyzs: ndarray | None, optional
- (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
- initial_rpys: ndarray | None, optional
- (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
- physics : Physics, optional
- The desired implementation of PyBullet physics/custom dynamics.
- pyb_freq : int, optional
- The frequency at which PyBullet steps (a multiple of ctrl_freq).
- ctrl_freq : int, optional
- The frequency at which the environment steps.
- gui : bool, optional
- Whether to use PyBullet's GUI.
- record : bool, optional
- Whether to save a video of the simulation in folder `files/videos/`.
- obs : ObservationType, optional
- The type of observation space (kinematic information or vision)
- act : ActionType, optional
- The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control)
-
- """
- super().__init__(drone_model=drone_model,
- initial_xyzs=initial_xyzs,
- initial_rpys=initial_rpys,
- physics=physics,
- pyb_freq=pyb_freq,
- ctrl_freq=ctrl_freq,
- gui=gui,
- record=record,
- obs=obs,
- act=act
- )
-
- ################################################################################
-
- def _computeReward(self):
- """Computes the current reward value.
-
- Returns
- -------
- float
- The reward.
-
- """
- state = self._getDroneStateVector(0)
- return -1 * np.linalg.norm(np.array([0, 0, 1])-state[0:3])**2
-
- ################################################################################
-
- def _computeTerminated(self):
- """Computes the current done value.
-
- Returns
- -------
- bool
- Whether the current episode is done.
-
- """
- if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC:
- return True
- else:
- return False
-
- ################################################################################
-
- def _computeTruncated(self):
- """Computes the current truncated value(s).
-
- Unused in this implementation.
-
- Returns
- -------
- bool
- Always false.
-
- """
- return False
-
- ################################################################################
-
- def _computeInfo(self):
- """Computes the current info dict(s).
-
- Unused.
-
- Returns
- -------
- dict[str, int]
- Dummy value.
-
- """
- return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years
-
- ################################################################################
-
- def _clipAndNormalizeState(self,
- state
- ):
- """Normalizes a drone's state to the [-1,1] range.
-
- Parameters
- ----------
- state : ndarray
- (20,)-shaped array of floats containing the non-normalized state of a single drone.
-
- Returns
- -------
- ndarray
- (20,)-shaped array of floats containing the normalized state of a single drone.
-
- """
- MAX_LIN_VEL_XY = 3
- MAX_LIN_VEL_Z = 1
-
- MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC
- MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC
-
- MAX_PITCH_ROLL = np.pi # Full range
-
- clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY)
- clipped_pos_z = np.clip(state[2], 0, MAX_Z)
- clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL)
- clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY)
- clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z)
-
- if self.GUI:
- self._clipAndNormalizeStateWarning(state,
- clipped_pos_xy,
- clipped_pos_z,
- clipped_rp,
- clipped_vel_xy,
- clipped_vel_z
- )
-
- normalized_pos_xy = clipped_pos_xy / MAX_XY
- normalized_pos_z = clipped_pos_z / MAX_Z
- normalized_rp = clipped_rp / MAX_PITCH_ROLL
- normalized_y = state[9] / np.pi # No reason to clip
- normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY
- normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY
- normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16]
-
- norm_and_clipped = np.hstack([normalized_pos_xy,
- normalized_pos_z,
- state[3:7],
- normalized_rp,
- normalized_y,
- normalized_vel_xy,
- normalized_vel_z,
- normalized_ang_vel,
- state[16:20]
- ]).reshape(20,)
-
- return norm_and_clipped
-
- ################################################################################
-
- def _clipAndNormalizeStateWarning(self,
- state,
- clipped_pos_xy,
- clipped_pos_z,
- clipped_rp,
- clipped_vel_xy,
- clipped_vel_z,
- ):
- """Debugging printouts associated to `_clipAndNormalizeState`.
-
- Print a warning if values in a state vector is out of the clipping range.
-
- """
- if not(clipped_pos_xy == np.array(state[0:2])).all():
- print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1]))
- if not(clipped_pos_z == np.array(state[2])).all():
- print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2]))
- if not(clipped_rp == np.array(state[7:9])).all():
- print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8]))
- if not(clipped_vel_xy == np.array(state[10:12])).all():
- print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11]))
- if not(clipped_vel_z == np.array(state[12])).all():
- print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12]))
diff --git a/gym_pybullet_drones/envs/single_agent_rl/__init__.py b/gym_pybullet_drones/envs/single_agent_rl/__init__.py
deleted file mode 100644
index b50f296b4..000000000
--- a/gym_pybullet_drones/envs/single_agent_rl/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import BaseSingleAgentAviary
-from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary
-from gym_pybullet_drones.envs.single_agent_rl.FlyThruGateAviary import FlyThruGateAviary
diff --git a/gym_pybullet_drones/examples/beta.py b/gym_pybullet_drones/examples/beta.py
index f7da8dd5e..7f47a4841 100644
--- a/gym_pybullet_drones/examples/beta.py
+++ b/gym_pybullet_drones/examples/beta.py
@@ -88,7 +88,7 @@ def run(
)
#### Run the simulation ####################################
- with open("../assets/beta.csv", mode='r') as csv_file:
+ with open("../assets/beta-traj.csv", mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
trajectory1 = iter([{
"pos": np.array([
diff --git a/gym_pybullet_drones/examples/learn.py b/gym_pybullet_drones/examples/learn.py
index 81ebf9e90..63809ab73 100644
--- a/gym_pybullet_drones/examples/learn.py
+++ b/gym_pybullet_drones/examples/learn.py
@@ -1,370 +1,193 @@
-"""Script demonstrating the use of `gym_pybullet_drones`' Gymnasium interface.
+"""Script demonstrating the use of `gym_pybullet_drones`'s Gymnasium interface.
-Class HoverAviary is used as a learning env for the PPO algorithm.
+Classes HoverAviary and LeaderFollowerAviary are used as learning envs for the PPO algorithm.
Example
-------
In a terminal, run as:
- $ python learn.py
+ $ python learn.py --multiagent false
+ $ python learn.py --multiagent true
Notes
-----
This is a minimal working example integrating `gym-pybullet-drones` with
-reinforcement learning libraries `stable-baselines3`.
+reinforcement learning library `stable-baselines3`.
"""
+import os
import time
+from datetime import datetime
import argparse
import gymnasium as gym
import numpy as np
+import torch
from stable_baselines3 import PPO
+from stable_baselines3.common.env_util import make_vec_env
+from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
+from stable_baselines3.common.evaluation import evaluate_policy
from gym_pybullet_drones.utils.Logger import Logger
-from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary
+from gym_pybullet_drones.envs.HoverAviary import HoverAviary
+from gym_pybullet_drones.envs.MultiHoverAviary import MultiHoverAviary
from gym_pybullet_drones.utils.utils import sync, str2bool
+from gym_pybullet_drones.utils.enums import ObservationType, ActionType
DEFAULT_GUI = True
DEFAULT_RECORD_VIDEO = False
DEFAULT_OUTPUT_FOLDER = 'results'
DEFAULT_COLAB = False
-def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=DEFAULT_COLAB, record_video=DEFAULT_RECORD_VIDEO):
+DEFAULT_OBS = ObservationType('kin') # 'kin' or 'rgb'
+DEFAULT_ACT = ActionType('one_d_rpm') # 'rpm' or 'pid' or 'vel' or 'one_d_rpm' or 'one_d_pid'
+DEFAULT_AGENTS = 2
+DEFAULT_MA = False
+
+def run(multiagent=DEFAULT_MA, output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=DEFAULT_COLAB, record_video=DEFAULT_RECORD_VIDEO, local=True):
+
+ filename = os.path.join(output_folder, 'save-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S"))
+ if not os.path.exists(filename):
+ os.makedirs(filename+'/')
+
+ if not multiagent:
+ train_env = make_vec_env(HoverAviary,
+ env_kwargs=dict(obs=DEFAULT_OBS, act=DEFAULT_ACT),
+ n_envs=1,
+ seed=0
+ )
+ eval_env = HoverAviary(obs=DEFAULT_OBS, act=DEFAULT_ACT)
+ else:
+ train_env = make_vec_env(MultiHoverAviary,
+ env_kwargs=dict(num_drones=DEFAULT_AGENTS, obs=DEFAULT_OBS, act=DEFAULT_ACT),
+ n_envs=1,
+ seed=0
+ )
+ eval_env = MultiHoverAviary(num_drones=DEFAULT_AGENTS, obs=DEFAULT_OBS, act=DEFAULT_ACT)
#### Check the environment's spaces ########################
- env = gym.make("hover-aviary-v0")
- print("[INFO] Action space:", env.action_space)
- print("[INFO] Observation space:", env.observation_space)
+ print('[INFO] Action space:', train_env.action_space)
+ print('[INFO] Observation space:', train_env.observation_space)
#### Train the model #######################################
- model = PPO("MlpPolicy",
- env,
- verbose=1
- )
- model.learn(total_timesteps=10000) # Typically not enough
+ model = PPO('MlpPolicy',
+ train_env,
+ # policy_kwargs=dict(activation_fn=torch.nn.ReLU, net_arch=[512, 512, dict(vf=[256, 128], pi=[256, 128])]),
+ # tensorboard_log=filename+'/tb/',
+ verbose=1)
+
+ callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=np.inf,
+ verbose=1)
+ eval_callback = EvalCallback(eval_env,
+ callback_on_new_best=callback_on_best,
+ verbose=1,
+ best_model_save_path=filename+'/',
+ log_path=filename+'/',
+ eval_freq=int(2000),
+ deterministic=True,
+ render=False)
+ model.learn(total_timesteps=3*int(1e5) if local else int(1e2), # shorter training in GitHub Actions pytest
+ callback=eval_callback,
+ log_interval=100)
+
+ #### Save the model ########################################
+ model.save(filename+'/success_model.zip')
+ print(filename)
+
+ #### Print training progression ############################
+ with np.load(filename+'/evaluations.npz') as data:
+ for j in range(data['timesteps'].shape[0]):
+ print(str(data['timesteps'][j])+","+str(data['results'][j][0]))
+
+ ############################################################
+ ############################################################
+ ############################################################
+ ############################################################
+ ############################################################
+
+ if os.path.isfile(filename+'/success_model.zip'):
+ path = filename+'/success_model.zip'
+ elif os.path.isfile(filename+'/best_model.zip'):
+ path = filename+'/best_model.zip'
+ else:
+ print("[ERROR]: no model under the specified path", filename)
+ model = PPO.load(path)
#### Show (and record a video of) the model's performance ##
- env = HoverAviary(gui=gui,
- record=record_video
- )
- logger = Logger(logging_freq_hz=int(env.CTRL_FREQ),
- num_drones=1,
- output_folder=output_folder,
- colab=colab
- )
- obs, info = env.reset(seed=42, options={})
+ if not multiagent:
+ test_env = HoverAviary(gui=gui,
+ obs=DEFAULT_OBS,
+ act=DEFAULT_ACT,
+ record=record_video)
+ test_env_nogui = HoverAviary(obs=DEFAULT_OBS, act=DEFAULT_ACT)
+ else:
+ test_env = MultiHoverAviary(gui=gui,
+ num_drones=DEFAULT_AGENTS,
+ obs=DEFAULT_OBS,
+ act=DEFAULT_ACT,
+ record=record_video)
+ test_env_nogui = MultiHoverAviary(num_drones=DEFAULT_AGENTS, obs=DEFAULT_OBS, act=DEFAULT_ACT)
+ logger = Logger(logging_freq_hz=int(test_env.CTRL_FREQ),
+ num_drones=DEFAULT_AGENTS if multiagent else 1,
+ output_folder=output_folder,
+ colab=colab
+ )
+
+ mean_reward, std_reward = evaluate_policy(model,
+ test_env_nogui,
+ n_eval_episodes=10
+ )
+ print("\n\n\nMean reward ", mean_reward, " +- ", std_reward, "\n\n")
+
+ obs, info = test_env.reset(seed=42, options={})
start = time.time()
- for i in range(3*env.CTRL_FREQ):
+ for i in range((test_env.EPISODE_LEN_SEC+2)*test_env.CTRL_FREQ):
action, _states = model.predict(obs,
deterministic=True
)
- obs, reward, terminated, truncated, info = env.step(action)
- logger.log(drone=0,
- timestamp=i/env.CTRL_FREQ,
- state=np.hstack([obs[0:3], np.zeros(4), obs[3:15], np.resize(action, (4))]),
- control=np.zeros(12)
- )
- env.render()
+ obs, reward, terminated, truncated, info = test_env.step(action)
+ obs2 = obs.squeeze()
+ act2 = action.squeeze()
+ print("Obs:", obs, "\tAction", action, "\tReward:", reward, "\tTerminated:", terminated, "\tTruncated:", truncated)
+ if DEFAULT_OBS == ObservationType.KIN:
+ if not multiagent:
+ logger.log(drone=0,
+ timestamp=i/test_env.CTRL_FREQ,
+ state=np.hstack([obs2[0:3],
+ np.zeros(4),
+ obs2[3:15],
+ act2
+ ]),
+ control=np.zeros(12)
+ )
+ else:
+ for d in range(DEFAULT_AGENTS):
+ logger.log(drone=d,
+ timestamp=i/test_env.CTRL_FREQ,
+ state=np.hstack([obs2[d][0:3],
+ np.zeros(4),
+ obs2[d][3:15],
+ act2[d]
+ ]),
+ control=np.zeros(12)
+ )
+ test_env.render()
print(terminated)
- sync(i, start, env.CTRL_TIMESTEP)
+ sync(i, start, test_env.CTRL_TIMESTEP)
if terminated:
- obs = env.reset(seed=42, options={})
- env.close()
+ obs = test_env.reset(seed=42, options={})
+ test_env.close()
- if plot:
+ if plot and DEFAULT_OBS == ObservationType.KIN:
logger.plot()
-if __name__ == "__main__":
+if __name__ == '__main__':
#### Define and parse (optional) arguments for the script ##
- parser = argparse.ArgumentParser(description='Single agent reinforcement learning example script using HoverAviary')
- parser.add_argument('--gui', default=DEFAULT_GUI, type=str2bool, help='Whether to use PyBullet GUI (default: True)', metavar='')
- parser.add_argument('--record_video', default=DEFAULT_RECORD_VIDEO, type=str2bool, help='Whether to record a video (default: False)', metavar='')
+ parser = argparse.ArgumentParser(description='Single agent reinforcement learning example script')
+ parser.add_argument('--multiagent', default=DEFAULT_MA, type=str2bool, help='Whether to use example LeaderFollower instead of Hover (default: False)', metavar='')
+ parser.add_argument('--gui', default=DEFAULT_GUI, type=str2bool, help='Whether to use PyBullet GUI (default: True)', metavar='')
+ parser.add_argument('--record_video', default=DEFAULT_RECORD_VIDEO, type=str2bool, help='Whether to record a video (default: False)', metavar='')
parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='')
- parser.add_argument('--colab', default=DEFAULT_COLAB, type=bool, help='Whether example is being run by a notebook (default: "False")', metavar='')
+ parser.add_argument('--colab', default=DEFAULT_COLAB, type=bool, help='Whether example is being run by a notebook (default: "False")', metavar='')
ARGS = parser.parse_args()
run(**vars(ARGS))
-
-# """Learning script for single agent problems.
-
-# Agents are based on `stable_baselines3`'s implementation of A2C, PPO SAC, TD3, DDPG.
-
-# Example
-# -------
-# To run the script, type in a terminal:
-
-# $ python singleagent.py --env --algo --obs --act --cpu
-
-# Notes
-# -----
-# Use:
-
-# $ tensorboard --logdir ./results/save-----/tb/
-
-# To check the tensorboard results at:
-
-# http://localhost:6006/
-
-# """
-# import os
-# import time
-# from datetime import datetime
-# from sys import platform
-# import argparse
-# import subprocess
-# import numpy as np
-# import gymnasium as gym
-# import torch
-# from stable_baselines3.common.env_checker import check_env
-# from stable_baselines3.common.cmd_util import make_vec_env # Module cmd_util will be renamed to env_util https://github.com/DLR-RM/stable-baselines3/pull/197
-# from stable_baselines3.common.vec_env import SubprocVecEnv, VecTransposeImage
-# from stable_baselines3.common.utils import set_random_seed
-# from stable_baselines3 import A2C
-# from stable_baselines3 import PPO
-# from stable_baselines3 import SAC
-# from stable_baselines3 import TD3
-# from stable_baselines3 import DDPG
-# from stable_baselines3.common.policies import ActorCriticPolicy as a2cppoMlpPolicy
-# from stable_baselines3.common.policies import ActorCriticCnnPolicy as a2cppoCnnPolicy
-# from stable_baselines3.sac.policies import SACPolicy as sacMlpPolicy
-# from stable_baselines3.sac import CnnPolicy as sacCnnPolicy
-# from stable_baselines3.td3 import MlpPolicy as td3ddpgMlpPolicy
-# from stable_baselines3.td3 import CnnPolicy as td3ddpgCnnPolicy
-# from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback, StopTrainingOnRewardThreshold
-
-# from gym_pybullet_drones.envs.single_agent_rl.TakeoffAviary import TakeoffAviary
-# from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary
-# from gym_pybullet_drones.envs.single_agent_rl.FlyThruGateAviary import FlyThruGateAviary
-# from gym_pybullet_drones.envs.single_agent_rl.TuneAviary import TuneAviary
-# from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
-
-# import shared_constants
-
-# EPISODE_REWARD_THRESHOLD = -0 # Upperbound: rewards are always negative, but non-zero
-# """float: Reward threshold to halt the script."""
-
-# DEFAULT_ENV = 'hover'
-# DEFAULT_ALGO = 'ppo'
-# DEFAULT_OBS = ObservationType('kin')
-# DEFAULT_ACT = ActionType('one_d_rpm')
-# DEFAULT_CPU = 1
-# DEFAULT_STEPS = 35000
-# DEFAULT_OUTPUT_FOLDER = 'results'
-
-# def run(
-# env=DEFAULT_ENV,
-# algo=DEFAULT_ALGO,
-# obs=DEFAULT_OBS,
-# act=DEFAULT_ACT,
-# cpu=DEFAULT_CPU,
-# steps=DEFAULT_STEPS,
-# output_folder=DEFAULT_OUTPUT_FOLDER
-# ):
-
-# #### Save directory ########################################
-# filename = os.path.join(output_folder, 'save-'+env+'-'+algo+'-'+obs.value+'-'+act.value+'-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S"))
-# if not os.path.exists(filename):
-# os.makedirs(filename+'/')
-
-# #### Print out current git commit hash #####################
-# if (platform == "linux" or platform == "darwin") and ('GITHUB_ACTIONS' not in os.environ.keys()):
-# git_commit = subprocess.check_output(["git", "describe", "--tags"]).strip()
-# with open(filename+'/git_commit.txt', 'w+') as f:
-# f.write(str(git_commit))
-
-# #### Warning ###############################################
-# if env == 'tune' and act != ActionType.TUN:
-# print("\n\n\n[WARNING] TuneAviary is intended for use with ActionType.TUN\n\n\n")
-# if act == ActionType.ONE_D_RPM or act == ActionType.ONE_D_DYN or act == ActionType.ONE_D_PID:
-# print("\n\n\n[WARNING] Simplified 1D problem for debugging purposes\n\n\n")
-# #### Errors ################################################
-# if not env in ['takeoff', 'hover']:
-# print("[ERROR] 1D action space is only compatible with Takeoff and HoverAviary")
-# exit()
-# if act == ActionType.TUN and env != 'tune' :
-# print("[ERROR] ActionType.TUN is only compatible with TuneAviary")
-# exit()
-# if algo in ['sac', 'td3', 'ddpg'] and cpu!=1:
-# print("[ERROR] The selected algorithm does not support multiple environments")
-# exit()
-
-# #### Uncomment to debug slurm scripts ######################
-# # exit()
-
-# env_name = env+"-aviary-v0"
-# sa_env_kwargs = dict(aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act)
-# # train_env = gym.make(env_name, aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act) # single environment instead of a vectorized one
-# if env_name == "takeoff-aviary-v0":
-# train_env = make_vec_env(TakeoffAviary,
-# env_kwargs=sa_env_kwargs,
-# n_envs=cpu,
-# seed=0
-# )
-# if env_name == "hover-aviary-v0":
-# train_env = make_vec_env(HoverAviary,
-# env_kwargs=sa_env_kwargs,
-# n_envs=cpu,
-# seed=0
-# )
-# if env_name == "flythrugate-aviary-v0":
-# train_env = make_vec_env(FlyThruGateAviary,
-# env_kwargs=sa_env_kwargs,
-# n_envs=cpu,
-# seed=0
-# )
-# if env_name == "tune-aviary-v0":
-# train_env = make_vec_env(TuneAviary,
-# env_kwargs=sa_env_kwargs,
-# n_envs=cpu,
-# seed=0
-# )
-# print("[INFO] Action space:", train_env.action_space)
-# print("[INFO] Observation space:", train_env.observation_space)
-# # check_env(train_env, warn=True, skip_render_check=True)
-
-# #### On-policy algorithms ##################################
-# onpolicy_kwargs = dict(activation_fn=torch.nn.ReLU,
-# net_arch=[512, 512, dict(vf=[256, 128], pi=[256, 128])]
-# ) # or None
-# if algo == 'a2c':
-# model = A2C(a2cppoMlpPolicy,
-# train_env,
-# policy_kwargs=onpolicy_kwargs,
-# tensorboard_log=filename+'/tb/',
-# verbose=1
-# ) if obs == ObservationType.KIN else A2C(a2cppoCnnPolicy,
-# train_env,
-# policy_kwargs=onpolicy_kwargs,
-# tensorboard_log=filename+'/tb/',
-# verbose=1
-# )
-# if algo == 'ppo':
-# model = PPO(a2cppoMlpPolicy,
-# train_env,
-# policy_kwargs=onpolicy_kwargs,
-# tensorboard_log=filename+'/tb/',
-# verbose=1
-# ) if obs == ObservationType.KIN else PPO(a2cppoCnnPolicy,
-# train_env,
-# policy_kwargs=onpolicy_kwargs,
-# tensorboard_log=filename+'/tb/',
-# verbose=1
-# )
-
-# #### Off-policy algorithms #################################
-# offpolicy_kwargs = dict(activation_fn=torch.nn.ReLU,
-# net_arch=[512, 512, 256, 128]
-# ) # or None # or dict(net_arch=dict(qf=[256, 128, 64, 32], pi=[256, 128, 64, 32]))
-# if algo == 'sac':
-# model = SAC(sacMlpPolicy,
-# train_env,
-# policy_kwargs=offpolicy_kwargs,
-# tensorboard_log=filename+'/tb/',
-# verbose=1
-# ) if obs==ObservationType.KIN else SAC(sacCnnPolicy,
-# train_env,
-# policy_kwargs=offpolicy_kwargs,
-# tensorboard_log=filename+'/tb/',
-# verbose=1
-# )
-# if algo == 'td3':
-# model = TD3(td3ddpgMlpPolicy,
-# train_env,
-# policy_kwargs=offpolicy_kwargs,
-# tensorboard_log=filename+'/tb/',
-# verbose=1
-# ) if obs==ObservationType.KIN else TD3(td3ddpgCnnPolicy,
-# train_env,
-# policy_kwargs=offpolicy_kwargs,
-# tensorboard_log=filename+'/tb/',
-# verbose=1
-# )
-# if algo == 'ddpg':
-# model = DDPG(td3ddpgMlpPolicy,
-# train_env,
-# policy_kwargs=offpolicy_kwargs,
-# tensorboard_log=filename+'/tb/',
-# verbose=1
-# ) if obs==ObservationType.KIN else DDPG(td3ddpgCnnPolicy,
-# train_env,
-# policy_kwargs=offpolicy_kwargs,
-# tensorboard_log=filename+'/tb/',
-# verbose=1
-# )
-
-# #### Create eveluation environment #########################
-# if obs == ObservationType.KIN:
-# eval_env = gym.make(env_name,
-# aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-# obs=obs,
-# act=act
-# )
-# elif obs == ObservationType.RGB:
-# if env_name == "takeoff-aviary-v0":
-# eval_env = make_vec_env(TakeoffAviary,
-# env_kwargs=sa_env_kwargs,
-# n_envs=1,
-# seed=0
-# )
-# if env_name == "hover-aviary-v0":
-# eval_env = make_vec_env(HoverAviary,
-# env_kwargs=sa_env_kwargs,
-# n_envs=1,
-# seed=0
-# )
-# if env_name == "flythrugate-aviary-v0":
-# eval_env = make_vec_env(FlyThruGateAviary,
-# env_kwargs=sa_env_kwargs,
-# n_envs=1,
-# seed=0
-# )
-# if env_name == "tune-aviary-v0":
-# eval_env = make_vec_env(TuneAviary,
-# env_kwargs=sa_env_kwargs,
-# n_envs=1,
-# seed=0
-# )
-# eval_env = VecTransposeImage(eval_env)
-
-# #### Train the model #######################################
-# # checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=filename+'-logs/', name_prefix='rl_model')
-# callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=EPISODE_REWARD_THRESHOLD,
-# verbose=1
-# )
-# eval_callback = EvalCallback(eval_env,
-# callback_on_new_best=callback_on_best,
-# verbose=1,
-# best_model_save_path=filename+'/',
-# log_path=filename+'/',
-# eval_freq=int(2000/cpu),
-# deterministic=True,
-# render=False
-# )
-# model.learn(total_timesteps=steps, #int(1e12),
-# callback=eval_callback,
-# log_interval=100,
-# )
-
-# #### Save the model ########################################
-# model.save(filename+'/success_model.zip')
-# print(filename)
-
-# #### Print training progression ############################
-# with np.load(filename+'/evaluations.npz') as data:
-# for j in range(data['timesteps'].shape[0]):
-# print(str(data['timesteps'][j])+","+str(data['results'][j][0]))
-
-
-# if __name__ == "__main__":
-# #### Define and parse (optional) arguments for the script ##
-# parser = argparse.ArgumentParser(description='Single agent reinforcement learning experiments script')
-# parser.add_argument('--env', default=DEFAULT_ENV, type=str, choices=['takeoff', 'hover', 'flythrugate', 'tune'], help='Task (default: hover)', metavar='')
-# parser.add_argument('--algo', default=DEFAULT_ALGO, type=str, choices=['a2c', 'ppo', 'sac', 'td3', 'ddpg'], help='RL agent (default: ppo)', metavar='')
-# parser.add_argument('--obs', default=DEFAULT_OBS, type=ObservationType, help='Observation space (default: kin)', metavar='')
-# parser.add_argument('--act', default=DEFAULT_ACT, type=ActionType, help='Action space (default: one_d_rpm)', metavar='')
-# parser.add_argument('--cpu', default=DEFAULT_CPU, type=int, help='Number of training environments (default: 1)', metavar='')
-# parser.add_argument('--steps', default=DEFAULT_STEPS, type=int, help='Number of training time steps (default: 35000)', metavar='')
-# parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='')
-# ARGS = parser.parse_args()
-
-# run(**vars(ARGS))
\ No newline at end of file
diff --git a/pypi_description.md b/pypi_description.md
index 31acb7dcc..c60fe2e9f 100644
--- a/pypi_description.md
+++ b/pypi_description.md
@@ -1,6 +1,6 @@
# gym-pybullet-drones
-[Simple](https://en.wikipedia.org/wiki/KISS_principle) OpenAI [Gym environment](https://gym.openai.com/envs/#classic_control) based on [PyBullet](https://github.com/bulletphysics/bullet3) for multi-agent reinforcement learning with quadrotors
+[Simple](https://en.wikipedia.org/wiki/KISS_principle) OpenAI [Gym environment](https://gym.openai.com/envs/#classic_control) based on [PyBullet](https://github.com/bulletphysics/bullet3) for multi-agent reinforcement learning with quadrotors
Find the project's published results at [Learning to Fly—a Gym Environment with PyBullet Physics for Reinforcement Learning of Multi-agent Quadcopter Control](https://ieeexplore.ieee.org/abstract/document/9635857).
diff --git a/tests/test_examples.py b/tests/test_examples.py
index 8df9b1ca0..3a04dd88e 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -12,4 +12,4 @@ def test_downwash():
def test_learn():
from gym_pybullet_drones.examples.learn import run
- run(gui=False, plot=False, output_folder='tmp')
+ run(gui=False, plot=False, output_folder='tmp', local=False)