From dd95218674cd27ae1ab63e8add0392eb1a878146 Mon Sep 17 00:00:00 2001
From: Jacopo Panerati <jacopo.panerati@utoronto.ca>
Date: Sun, 15 Oct 2023 17:37:02 +0400
Subject: [PATCH] Simplified RL aviaries

---
 README.md                                     |   5 +-
 gym_pybullet_drones/__init__.py               |  14 +-
 ...aseMultiagentAviary.py => BaseRLAviary.py} |  34 +-
 .../envs/{single_agent_rl => }/HoverAviary.py |   7 +-
 .../LeaderFollowerAviary.py                   |  21 +-
 gym_pybullet_drones/envs/RLAviary.py          |   2 -
 gym_pybullet_drones/envs/__init__.py          |   3 +
 .../envs/multi_agent_rl/FlockAviary.py        | 265 ---------------
 .../envs/multi_agent_rl/__init__.py           |   3 -
 .../single_agent_rl/BaseSingleAgentAviary.py  | 305 ------------------
 .../envs/single_agent_rl/FlyThruGateAviary.py | 240 --------------
 .../envs/single_agent_rl/__init__.py          |   3 -
 gym_pybullet_drones/examples/learn.py         |  37 ++-
 13 files changed, 59 insertions(+), 880 deletions(-)
 rename gym_pybullet_drones/envs/{multi_agent_rl/BaseMultiagentAviary.py => BaseRLAviary.py} (92%)
 rename gym_pybullet_drones/envs/{single_agent_rl => }/HoverAviary.py (97%)
 rename gym_pybullet_drones/envs/{multi_agent_rl => }/LeaderFollowerAviary.py (93%)
 delete mode 100644 gym_pybullet_drones/envs/RLAviary.py
 delete mode 100644 gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py
 delete mode 100644 gym_pybullet_drones/envs/multi_agent_rl/__init__.py
 delete mode 100644 gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py
 delete mode 100644 gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py
 delete mode 100644 gym_pybullet_drones/envs/single_agent_rl/__init__.py

diff --git a/README.md b/README.md
index c3829e2a9..0dbd807ed 100644
--- a/README.md
+++ b/README.md
@@ -99,10 +99,11 @@ If you wish, please cite our [IROS 2021 paper](https://arxiv.org/abs/2103.02142)
 ## TODO
 
 - [ ] Add `crazyflie-firmware` SITL support @spencerteetaert
+
+## DESIDERATA
+
 - [ ] Add motor delay @JacopoPan / @spencerteetaert
 - [ ] Replace `rpy` with quaternions (and `ang_vel` with body rates) in `obs` @JacopoPan
-- [ ] Replace `BaseSingleAgentAviary` and `BaseMultiAgentAviary` with a single `RLAviary`, incl. PR #161 @JacopoPan
-- [ ] Add a multi-agent MDP with 2-drone chase through a gate @JacopoPan
 
 -----
 > University of Toronto's [Dynamic Systems Lab](https://github.com/utiasDSL) / [Vector Institute](https://github.com/VectorInstitute) / University of Cambridge's [Prorok Lab](https://github.com/proroklab)
diff --git a/gym_pybullet_drones/__init__.py b/gym_pybullet_drones/__init__.py
index 365cbfaab..09b80fc51 100644
--- a/gym_pybullet_drones/__init__.py
+++ b/gym_pybullet_drones/__init__.py
@@ -12,20 +12,10 @@
 
 register(
     id='hover-aviary-v0',
-    entry_point='gym_pybullet_drones.envs.single_agent_rl:HoverAviary',
-)
-
-register(
-    id='flythrugate-aviary-v0',
-    entry_point='gym_pybullet_drones.envs.single_agent_rl:FlyThruGateAviary',
-)
-
-register(
-    id='flock-aviary-v0',
-    entry_point='gym_pybullet_drones.envs.multi_agent_rl:FlockAviary',
+    entry_point='gym_pybullet_drones.envs:HoverAviary',
 )
 
 register(
     id='leaderfollower-aviary-v0',
-    entry_point='gym_pybullet_drones.envs.multi_agent_rl:LeaderFollowerAviary',
+    entry_point='gym_pybullet_drones.envs:LeaderFollowerAviary',
 )
diff --git a/gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py b/gym_pybullet_drones/envs/BaseRLAviary.py
similarity index 92%
rename from gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py
rename to gym_pybullet_drones/envs/BaseRLAviary.py
index 40415449a..865baed20 100644
--- a/gym_pybullet_drones/envs/multi_agent_rl/BaseMultiagentAviary.py
+++ b/gym_pybullet_drones/envs/BaseRLAviary.py
@@ -7,14 +7,14 @@
 from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType, ImageType
 from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl
 
-class BaseMultiagentAviary(BaseAviary):
-    """Base multi-agent environment class for reinforcement learning."""
+class BaseRLAviary(BaseAviary):
+    """Base single and multi-agent environment class for reinforcement learning."""
     
     ################################################################################
 
     def __init__(self,
                  drone_model: DroneModel=DroneModel.CF2X,
-                 num_drones: int=2,
+                 num_drones: int=1,
                  neighbourhood_radius: float=np.inf,
                  initial_xyzs=None,
                  initial_rpys=None,
@@ -26,7 +26,7 @@ def __init__(self,
                  obs: ObservationType=ObservationType.KIN,
                  act: ActionType=ActionType.RPM
                  ):
-        """Initialization of a generic multi-agent RL environment.
+        """Initialization of a generic single and multi-agent RL environment.
 
         Attributes `vision_attributes` and `dynamics_attributes` are selected
         based on the choice of `obs` and `act`; `obstacles` is set to True 
@@ -61,9 +61,6 @@ def __init__(self,
             The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.)
 
         """
-        if num_drones < 2:
-            print("[ERROR] in BaseMultiagentAviary.__init__(), num_drones should be >= 2")
-            exit()
         vision_attributes = True if obs == ObservationType.RGB else False
         self.OBS_TYPE = obs
         self.ACT_TYPE = act
@@ -74,7 +71,7 @@ def __init__(self,
             if drone_model in [DroneModel.CF2X, DroneModel.CF2P]:
                 self.ctrl = [DSLPIDControl(drone_model=DroneModel.CF2X) for i in range(num_drones)]
             else:
-                print("[ERROR] in BaseMultiagentAviary.__init()__, no controller is available for the specified drone_model")
+                print("[ERROR] in BaseRLAviary.__init()__, no controller is available for the specified drone_model")
         super().__init__(drone_model=drone_model,
                          num_drones=num_drones,
                          neighbourhood_radius=neighbourhood_radius,
@@ -144,7 +141,7 @@ def _actionSpace(self):
         elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]:
             size = 1
         else:
-            print("[ERROR] in BaseMultiagentAviary._actionSpace()")
+            print("[ERROR] in BaseRLAviary._actionSpace()")
             exit()
         act_lower_bound = np.array([-1*np.ones(size) for i in range(self.NUM_DRONES)])
         act_upper_bound = np.array([+1*np.ones(size) for i in range(self.NUM_DRONES)])
@@ -228,7 +225,7 @@ def _preprocessAction(self,
                                                         )
                 rpm[k,:] = rpm
             else:
-                print("[ERROR] in BaseMultiagentAviary._preprocessAction()")
+                print("[ERROR] in BaseRLAviary._preprocessAction()")
                 exit()
         return rpm
 
@@ -244,10 +241,6 @@ def _observationSpace(self):
 
         """
         if self.OBS_TYPE == ObservationType.RGB:
-            # return spaces.Dict({i: spaces.Box(low=0,
-            #                                   high=255,
-            #                                   shape=(self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8
-            #                                   ) for i in range(self.NUM_DRONES)})
             return spaces.Box(low=0,
                               high=255,
                               shape=(self.NUM_DRONES, self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8)
@@ -259,13 +252,14 @@ def _observationSpace(self):
             # obs_upper_bound = np.array([1,       1,       1,      1,   1,   1,   1,   1,      1,      1,      1,       1,       1,       1,       1,       1,       1,            1,            1,            1])          
             # return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 )
             ############################################################
-            #### OBS SPACE OF SIZE 12
+            #### OBS SPACE OF SIZE 12, W/O RPMS
+            #### Observation vector ### X        Y        Z       Q1   Q2   Q3   Q4   R       P       Y       VX       VY       VZ       WX       WY       WZ
             obs_lower_bound = np.array([[-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1] for i in range(self.NUM_DRONES)])
             obs_upper_bound = np.array([[1,1,1, 1,1,1, 1,1,1, 1,1,1] for i in range(self.NUM_DRONES)])
             return spaces.Box(low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32)
             ############################################################
         else:
-            print("[ERROR] in BaseMultiagentAviary._observationSpace()")
+            print("[ERROR] in BaseRLAviary._observationSpace()")
     
     ################################################################################
 
@@ -291,21 +285,21 @@ def _computeObs(self):
                                           path=self.ONBOARD_IMG_PATH+"drone_"+str(i),
                                           frame_num=int(self.step_counter/self.IMG_CAPTURE_FREQ)
                                           )
-            return np.array([self.rgb[i] for i in range(self.NUM_DRONES)])
+            return np.array([self.rgb[i] for i in range(self.NUM_DRONES)]).astype('float32')
         elif self.OBS_TYPE == ObservationType.KIN:
             ############################################################
             #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS)
-            # return {   i   : self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) }
+            # return np.array([ self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) ]).astype('float32')
             ############################################################
             #### OBS SPACE OF SIZE 12
             obs_12 = np.zeros((self.NUM_DRONES,12))
             for i in range(self.NUM_DRONES):
                 obs = self._clipAndNormalizeState(self._getDroneStateVector(i))
                 obs_12[i, :] = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,)
-            return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)])
+            return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32')
             ############################################################
         else:
-            print("[ERROR] in BaseMultiagentAviary._computeObs()")
+            print("[ERROR] in BaseRLAviary._computeObs()")
 
     ################################################################################
 
diff --git a/gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py b/gym_pybullet_drones/envs/HoverAviary.py
similarity index 97%
rename from gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py
rename to gym_pybullet_drones/envs/HoverAviary.py
index 96fd10c2a..9908f3f50 100644
--- a/gym_pybullet_drones/envs/single_agent_rl/HoverAviary.py
+++ b/gym_pybullet_drones/envs/HoverAviary.py
@@ -1,9 +1,9 @@
 import numpy as np
 
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType, BaseSingleAgentAviary
+from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary
+from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType
 
-class HoverAviary(BaseSingleAgentAviary):
+class HoverAviary(BaseRLAviary):
     """Single agent RL problem: hover at position."""
 
     ################################################################################
@@ -49,6 +49,7 @@ def __init__(self,
 
         """
         super().__init__(drone_model=drone_model,
+                         num_drones=1,
                          initial_xyzs=initial_xyzs,
                          initial_rpys=initial_rpys,
                          physics=physics,
diff --git a/gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py b/gym_pybullet_drones/envs/LeaderFollowerAviary.py
similarity index 93%
rename from gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py
rename to gym_pybullet_drones/envs/LeaderFollowerAviary.py
index 4722ac2aa..67d83c0cc 100644
--- a/gym_pybullet_drones/envs/multi_agent_rl/LeaderFollowerAviary.py
+++ b/gym_pybullet_drones/envs/LeaderFollowerAviary.py
@@ -1,10 +1,9 @@
 import numpy as np
 
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
-from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary
+from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary
+from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType
 
-class LeaderFollowerAviary(BaseMultiagentAviary):
+class LeaderFollowerAviary(BaseRLAviary):
     """Multi-agent RL problem: leader-follower."""
 
     ################################################################################
@@ -79,13 +78,13 @@ def _computeReward(self):
             The reward value for each drone.
 
         """
-        rewards = {}
+        rewards = np.zeros(self.NUM_DRONES)
         states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)])
         rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 0.5]) - states[0, 0:3])**2
-        # rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD 
+        # rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD
         for i in range(1, self.NUM_DRONES):
-            rewards[i] = -(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2
-        return rewards
+            rewards[i] = (-(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2)
+        return rewards[0] #TODO: return multiple rewards
 
     ################################################################################
     
@@ -100,9 +99,9 @@ def _computeTerminated(self):
 
         """
         bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False
-        done = {i: bool_val for i in range(self.NUM_DRONES)}
-        done["__all__"] = bool_val # True if True in done.values() else False
-        return done
+        # done = {i: bool_val for i in range(self.NUM_DRONES)}
+        # done["__all__"] = bool_val # True if True in done.values() else False
+        return bool_val #TODO: return multiple terminatation values
 
     ################################################################################
     
diff --git a/gym_pybullet_drones/envs/RLAviary.py b/gym_pybullet_drones/envs/RLAviary.py
deleted file mode 100644
index bbf5bdb4b..000000000
--- a/gym_pybullet_drones/envs/RLAviary.py
+++ /dev/null
@@ -1,2 +0,0 @@
-'''TBD
-'''
diff --git a/gym_pybullet_drones/envs/__init__.py b/gym_pybullet_drones/envs/__init__.py
index d630927c1..c387403ee 100644
--- a/gym_pybullet_drones/envs/__init__.py
+++ b/gym_pybullet_drones/envs/__init__.py
@@ -1,2 +1,5 @@
+from gym_pybullet_drones.envs.BetaAviary import BetaAviary
 from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary
+from gym_pybullet_drones.envs.HoverAviary import HoverAviary
+from gym_pybullet_drones.envs.LeaderFollowerAviary import LeaderFollowerAviary
 from gym_pybullet_drones.envs.VelocityAviary import VelocityAviary
diff --git a/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py b/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py
deleted file mode 100644
index b28f8accd..000000000
--- a/gym_pybullet_drones/envs/multi_agent_rl/FlockAviary.py
+++ /dev/null
@@ -1,265 +0,0 @@
-import math
-import numpy as np
-
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
-from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary
-
-class FlockAviary(BaseMultiagentAviary):
-    """Multi-agent RL problem: flocking."""
-
-    ################################################################################
-
-    def __init__(self,
-                 drone_model: DroneModel=DroneModel.CF2X,
-                 num_drones: int=2,
-                 neighbourhood_radius: float=np.inf,
-                 initial_xyzs=None,
-                 initial_rpys=None,
-                 physics: Physics=Physics.PYB,
-                 pyb_freq: int = 240,
-                 ctrl_freq: int = 240,
-                 gui=False,
-                 record=False,
-                 obs: ObservationType=ObservationType.KIN,
-                 act: ActionType=ActionType.RPM):
-        """Initialization of a multi-agent RL environment.
-
-        Using the generic multi-agent RL superclass.
-
-        Parameters
-        ----------
-        drone_model : DroneModel, optional
-            The desired drone type (detailed in an .urdf file in folder `assets`).
-        num_drones : int, optional
-            The desired number of drones in the aviary.
-        neighbourhood_radius : float, optional
-            Radius used to compute the drones' adjacency matrix, in meters.
-        initial_xyzs: ndarray | None, optional
-            (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
-        initial_rpys: ndarray | None, optional
-            (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
-        physics : Physics, optional
-            The desired implementation of PyBullet physics/custom dynamics.
-        pyb_freq : int, optional
-            The frequency at which PyBullet steps (a multiple of ctrl_freq).
-        ctrl_freq : int, optional
-            The frequency at which the environment steps.
-        gui : bool, optional
-            Whether to use PyBullet's GUI.
-        record : bool, optional
-            Whether to save a video of the simulation in folder `files/videos/`.
-        obs : ObservationType, optional
-            The type of observation space (kinematic information or vision)
-        act : ActionType, optional
-            The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control)
-
-        """
-        super().__init__(drone_model=drone_model,
-                         num_drones=num_drones,
-                         neighbourhood_radius=neighbourhood_radius,
-                         initial_xyzs=initial_xyzs,
-                         initial_rpys=initial_rpys,
-                         physics=physics,
-                         pyb_freq=pyb_freq,
-                         ctrl_freq=ctrl_freq,
-                         gui=gui,
-                         record=record, 
-                         obs=obs,
-                         act=act
-                         )
-
-    ################################################################################
-    
-    def _computeReward(self):
-        """Computes the current reward value(s).
-
-        Returns
-        -------
-        dict[int, float]
-            The reward value for each drone.
-
-        """
-        rewards = {}
-        states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)])
-        rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 1]) - states[0, 0:3])**2
-        for i in range(1, self.NUM_DRONES):
-            rewards[i] = -1 * np.linalg.norm(states[i-1, 2] - states[i, 2])**2
-        return rewards
-        """
-        # obs here is dictionary of the form {"i":{"state": Box(20,), "neighbors": MultiBinary(NUM_DRONES)}}
-        # parse velocity and position
-        vel = np.zeros((1, self.NUM_DRONES, 3)); pos = np.zeros((1, self.NUM_DRONES, 3))
-        for i in range(self.NUM_DRONES):
-            pos[0,i,:] = obs[   i   ]["state"][0:3]
-            vel[0,i,:] = obs[   i   ]["state"][10:13]
-        # compute metrics
-        # velocity alignment
-        ali = 0
-        EPSILON = 1e-3  # avoid divide by zero
-        linear_vel_norm = np.linalg.norm(vel, axis=2)
-        for i in range(self.NUM_DRONES):
-            for j in range(self.NUM_DRONES):
-                if j != i:
-                    d = np.einsum('ij,ij->i', vel[:, i, :], vel[:, j, :])
-                    ali += (d / (linear_vel_norm[:, i] + EPSILON) / (linear_vel_norm[:, j] + EPSILON))
-        ali /= (self.NUM_DRONES * (self.NUM_DRONES - 1))
-        # flocking speed
-        cof_v = np.mean(vel, axis=1)  # center of flock speed
-        avg_flock_linear_speed = np.linalg.norm(cof_v, axis=-1)
-        # spacing
-        whole_flock_spacing = []
-        for i in range(self.NUM_DRONES):
-            flck_neighbor_pos = np.delete(pos, [i], 1)
-            drone_neighbor_pos_diff = flck_neighbor_pos - np.reshape(pos[:, i, :], (pos[:, i, :].shape[0], 1, -1))
-            drone_neighbor_dis = np.linalg.norm(drone_neighbor_pos_diff, axis=-1)
-            drone_spacing = np.amin(drone_neighbor_dis, axis=-1)
-            whole_flock_spacing.append(drone_spacing)
-        whole_flock_spacing = np.stack(whole_flock_spacing, axis=-1)
-        avg_flock_spacing = np.mean(whole_flock_spacing, axis=-1)
-        var_flock_spacing = np.var(whole_flock_spacing, axis=-1)
-        # flocking metrics
-        FLOCK_SPACING_MIN = 1.0; FLOCK_SPACING_MAX = 3.0
-        if FLOCK_SPACING_MIN < avg_flock_spacing[0] < FLOCK_SPACING_MAX:
-            avg_flock_spac_rew = 0.0
-        else:
-            avg_flock_spac_rew = min(math.fabs(avg_flock_spacing[0] - FLOCK_SPACING_MIN),
-                                     math.fabs(avg_flock_spacing[0] - FLOCK_SPACING_MAX))
-        reward = ali[0] + avg_flock_linear_speed[0] - avg_flock_spac_rew - var_flock_spacing[0]
-        return {   i   : reward for i in range(self.NUM_DRONES) }
-        """
-
-    ################################################################################
-    
-    def _computeTerminated(self):
-        """Computes the current done value(s).
-
-        Returns
-        -------
-        dict[int | "__all__", bool]
-            Dictionary with the done value of each drone and 
-            one additional boolean value for key "__all__".
-
-        """
-        bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False
-        done = {i: bool_val for i in range(self.NUM_DRONES)}
-        done["__all__"] = True if True in done.values() else False
-        return done
-    
-    ################################################################################
-    
-    def _computeTruncated(self):
-        """Computes the current truncated value(s).
-
-        Unused in this implementation.
-
-        Returns
-        -------
-        bool
-            Always false.
-
-        """
-        return False
-
-    ################################################################################
-    
-    def _computeInfo(self):
-        """Computes the current info dict(s).
-
-        Unused.
-
-        Returns
-        -------
-        dict[int, dict[]]
-            Dictionary of empty dictionaries.
-
-        """
-        return {i: {} for i in range(self.NUM_DRONES)}
-
-    ################################################################################
-
-    def _clipAndNormalizeState(self,
-                               state
-                               ):
-        """Normalizes a drone's state to the [-1,1] range.
-
-        Parameters
-        ----------
-        state : ndarray
-            (20,)-shaped array of floats containing the non-normalized state of a single drone.
-
-        Returns
-        -------
-        ndarray
-            (20,)-shaped array of floats containing the normalized state of a single drone.
-
-        """
-        MAX_LIN_VEL_XY = 3 
-        MAX_LIN_VEL_Z = 1
-
-        MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC
-        MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC
-
-        MAX_PITCH_ROLL = np.pi # Full range
-
-        clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY)
-        clipped_pos_z = np.clip(state[2], 0, MAX_Z)
-        clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL)
-        clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY)
-        clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z)
-
-        if self.GUI:
-            self._clipAndNormalizeStateWarning(state,
-                                               clipped_pos_xy,
-                                               clipped_pos_z,
-                                               clipped_rp,
-                                               clipped_vel_xy,
-                                               clipped_vel_z
-                                               )
-
-        normalized_pos_xy = clipped_pos_xy / MAX_XY
-        normalized_pos_z = clipped_pos_z / MAX_Z
-        normalized_rp = clipped_rp / MAX_PITCH_ROLL
-        normalized_y = state[9] / np.pi # No reason to clip
-        normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY
-        normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY
-        normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16]
-
-        norm_and_clipped = np.hstack([normalized_pos_xy,
-                                      normalized_pos_z,
-                                      state[3:7],
-                                      normalized_rp,
-                                      normalized_y,
-                                      normalized_vel_xy,
-                                      normalized_vel_z,
-                                      normalized_ang_vel,
-                                      state[16:20]
-                                      ]).reshape(20,)
-
-        return norm_and_clipped
-    
-    ################################################################################
-    
-    def _clipAndNormalizeStateWarning(self,
-                                      state,
-                                      clipped_pos_xy,
-                                      clipped_pos_z,
-                                      clipped_rp,
-                                      clipped_vel_xy,
-                                      clipped_vel_z,
-                                      ):
-        """Debugging printouts associated to `_clipAndNormalizeState`.
-
-        Print a warning if values in a state vector is out of the clipping range.
-        
-        """
-        if not(clipped_pos_xy == np.array(state[0:2])).all():
-            print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1]))
-        if not(clipped_pos_z == np.array(state[2])).all():
-            print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2]))
-        if not(clipped_rp == np.array(state[7:9])).all():
-            print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8]))
-        if not(clipped_vel_xy == np.array(state[10:12])).all():
-            print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11]))
-        if not(clipped_vel_z == np.array(state[12])).all():
-            print("[WARNING] it", self.step_counter, "in FlockAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12]))
diff --git a/gym_pybullet_drones/envs/multi_agent_rl/__init__.py b/gym_pybullet_drones/envs/multi_agent_rl/__init__.py
deleted file mode 100644
index 09197c8d5..000000000
--- a/gym_pybullet_drones/envs/multi_agent_rl/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary
-from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary
-from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary
diff --git a/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py b/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py
deleted file mode 100644
index 5b4745cb3..000000000
--- a/gym_pybullet_drones/envs/single_agent_rl/BaseSingleAgentAviary.py
+++ /dev/null
@@ -1,305 +0,0 @@
-import os
-from enum import Enum
-import numpy as np
-from gymnasium import spaces
-import pybullet as p
-
-from gym_pybullet_drones.envs.BaseAviary import BaseAviary
-from gym_pybullet_drones.utils.enums import DroneModel, Physics, ImageType, ActionType, ObservationType
-from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl
-
-
-class BaseSingleAgentAviary(BaseAviary):
-    """Base single drone environment class for reinforcement learning."""
-    
-    ################################################################################
-
-    def __init__(self,
-                 drone_model: DroneModel=DroneModel.CF2X,
-                 initial_xyzs=None,
-                 initial_rpys=None,
-                 physics: Physics=Physics.PYB,
-                 pyb_freq: int = 240,
-                 ctrl_freq: int = 240,
-                 gui=False,
-                 record=False,
-                 obs: ObservationType=ObservationType.KIN,
-                 act: ActionType=ActionType.RPM
-                 ):
-        """Initialization of a generic single agent RL environment.
-
-        Attribute `num_drones` is automatically set to 1; `vision_attributes`
-        are selected based on the choice of `obs` and `act`; `obstacles` is
-        set to True and overridden with landmarks for vision applications; 
-        `user_debug_gui` is set to False for performance.
-
-        Parameters
-        ----------
-        drone_model : DroneModel, optional
-            The desired drone type (detailed in an .urdf file in folder `assets`).
-        initial_xyzs: ndarray | None, optional
-            (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
-        initial_rpys: ndarray | None, optional
-            (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
-        physics : Physics, optional
-            The desired implementation of PyBullet physics/custom dynamics.
-        pyb_freq : int, optional
-            The frequency at which PyBullet steps (a multiple of ctrl_freq).
-        ctrl_freq : int, optional
-            The frequency at which the environment steps.
-        gui : bool, optional
-            Whether to use PyBullet's GUI.
-        record : bool, optional
-            Whether to save a video of the simulation in folder `files/videos/`.
-        obs : ObservationType, optional
-            The type of observation space (kinematic information or vision)
-        act : ActionType, optional
-            The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.)
-
-        """
-        vision_attributes = True if obs == ObservationType.RGB else False
-        self.OBS_TYPE = obs
-        self.ACT_TYPE = act
-        self.EPISODE_LEN_SEC = 5
-        #### Create integrated controllers #########################
-        if act in [ActionType.PID, ActionType.VEL, ActionType.ONE_D_PID]:
-            os.environ['KMP_DUPLICATE_LIB_OK']='True'
-            if drone_model in [DroneModel.CF2X, DroneModel.CF2P]:
-                self.ctrl = DSLPIDControl(drone_model=DroneModel.CF2X)
-            else:
-                print("[ERROR] in BaseSingleAgentAviary.__init()__, no controller is available for the specified drone_model")
-        super().__init__(drone_model=drone_model,
-                         num_drones=1,
-                         initial_xyzs=initial_xyzs,
-                         initial_rpys=initial_rpys,
-                         physics=physics,
-                         pyb_freq=pyb_freq,
-                         ctrl_freq=ctrl_freq,
-                         gui=gui,
-                         record=record, 
-                         obstacles=True, # Add obstacles for RGB observations and/or FlyThruGate
-                         user_debug_gui=False, # Remove of RPM sliders from all single agent learning aviaries
-                         vision_attributes=vision_attributes,
-                         )
-        #### Set a limit on the maximum target speed ###############
-        if act == ActionType.VEL:
-            self.SPEED_LIMIT = 0.03 * self.MAX_SPEED_KMH * (1000/3600)
-
-    ################################################################################
-
-    def _addObstacles(self):
-        """Add obstacles to the environment.
-
-        Only if the observation is of type RGB, 4 landmarks are added.
-        Overrides BaseAviary's method.
-
-        """
-        if self.OBS_TYPE == ObservationType.RGB:
-            p.loadURDF("block.urdf",
-                       [1, 0, .1],
-                       p.getQuaternionFromEuler([0, 0, 0]),
-                       physicsClientId=self.CLIENT
-                       )
-            p.loadURDF("cube_small.urdf",
-                       [0, 1, .1],
-                       p.getQuaternionFromEuler([0, 0, 0]),
-                       physicsClientId=self.CLIENT
-                       )
-            p.loadURDF("duck_vhacd.urdf",
-                       [-1, 0, .1],
-                       p.getQuaternionFromEuler([0, 0, 0]),
-                       physicsClientId=self.CLIENT
-                       )
-            p.loadURDF("teddy_vhacd.urdf",
-                       [0, -1, .1],
-                       p.getQuaternionFromEuler([0, 0, 0]),
-                       physicsClientId=self.CLIENT
-                       )
-        else:
-            pass
-
-    ################################################################################
-
-    def _actionSpace(self):
-        """Returns the action space of the environment.
-
-        Returns
-        -------
-        ndarray
-            A Box() of size 1, 3, 4, or 6 depending on the action type.
-
-        """
-        if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]:
-            size = 4
-        elif self.ACT_TYPE == ActionType.PID:
-            size = 3
-        elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]:
-            size = 1
-        else:
-            print("[ERROR] in BaseSingleAgentAviary._actionSpace()")
-            exit()
-        return spaces.Box(low=-1*np.ones(size),
-        # return spaces.Box(low=np.zeros(size),  # Alternative action space, see PR #32
-                          high=np.ones(size),
-                          dtype=np.float32
-                          )
-
-    ################################################################################
-
-    def _preprocessAction(self,
-                          action
-                          ):
-        """Pre-processes the action passed to `.step()` into motors' RPMs.
-
-        Parameter `action` is processed differenly for each of the different
-        action types: `action` can be of length 1, 3, 4, or 6 and represent 
-        RPMs, desired thrust and torques, the next target position to reach 
-        using PID control, a desired velocity vector, new PID coefficients, etc.
-
-        Parameters
-        ----------
-        action : ndarray
-            The input action for each drone, to be translated into RPMs.
-
-        Returns
-        -------
-        ndarray
-            (4,)-shaped array of ints containing to clipped RPMs
-            commanded to the 4 motors of each drone.
-
-        """
-        if self.ACT_TYPE == ActionType.RPM:
-            return np.array(self.HOVER_RPM * (1+0.05*action))
-        elif self.ACT_TYPE == ActionType.PID:
-            state = self._getDroneStateVector(0)
-            next_pos = self._calculateNextStep(
-                    current_position=state[0:3],
-                    destination=action,
-                    step_size=1,
-                )
-            rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP,
-                                                 cur_pos=state[0:3],
-                                                 cur_quat=state[3:7],
-                                                 cur_vel=state[10:13],
-                                                 cur_ang_vel=state[13:16],
-                                                 target_pos=next_pos
-                                                 )
-            return rpm
-        elif self.ACT_TYPE == ActionType.VEL:
-            state = self._getDroneStateVector(0)
-            if np.linalg.norm(action[0:3]) != 0:
-                v_unit_vector = action[0:3] / np.linalg.norm(action[0:3])
-            else:
-                v_unit_vector = np.zeros(3)
-            rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP,
-                                                 cur_pos=state[0:3],
-                                                 cur_quat=state[3:7],
-                                                 cur_vel=state[10:13],
-                                                 cur_ang_vel=state[13:16],
-                                                 target_pos=state[0:3], # same as the current position
-                                                 target_rpy=np.array([0,0,state[9]]), # keep current yaw
-                                                 target_vel=self.SPEED_LIMIT * np.abs(action[3]) * v_unit_vector # target the desired velocity vector
-                                                 )
-            return rpm
-        elif self.ACT_TYPE == ActionType.ONE_D_RPM:
-            return np.repeat(self.HOVER_RPM * (1+0.05*action), 4)
-        elif self.ACT_TYPE == ActionType.ONE_D_PID:
-            state = self._getDroneStateVector(0)
-            rpm, _, _ = self.ctrl.computeControl(control_timestep=self.CTRL_TIMESTEP,
-                                                 cur_pos=state[0:3],
-                                                 cur_quat=state[3:7],
-                                                 cur_vel=state[10:13],
-                                                 cur_ang_vel=state[13:16],
-                                                 target_pos=state[0:3]+0.1*np.array([0,0,action[0]])
-                                                 )
-            return rpm
-        else:
-            print("[ERROR] in BaseSingleAgentAviary._preprocessAction()")
-
-    ################################################################################
-
-    def _observationSpace(self):
-        """Returns the observation space of the environment.
-
-        Returns
-        -------
-        ndarray
-            A Box() of shape (H,W,4) or (12,) depending on the observation type.
-
-        """
-        if self.OBS_TYPE == ObservationType.RGB:
-            return spaces.Box(low=0,
-                              high=255,
-                              shape=(self.IMG_RES[1], self.IMG_RES[0], 4),
-                              dtype=np.uint8
-                              )
-        elif self.OBS_TYPE == ObservationType.KIN:
-            ############################################################
-            #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS)
-            #### Observation vector ### X        Y        Z       Q1   Q2   Q3   Q4   R       P       Y       VX       VY       VZ       WX       WY       WZ       P0            P1            P2            P3
-            # obs_lower_bound = np.array([-1,      -1,      0,      -1,  -1,  -1,  -1,  -1,     -1,     -1,     -1,      -1,      -1,      -1,      -1,      -1,      -1,           -1,           -1,           -1])
-            # obs_upper_bound = np.array([1,       1,       1,      1,   1,   1,   1,   1,      1,      1,      1,       1,       1,       1,       1,       1,       1,            1,            1,            1])          
-            # return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 )
-            ############################################################
-            #### OBS SPACE OF SIZE 12
-            return spaces.Box(low=np.array([-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1]),
-                              high=np.array([1,1,1, 1,1,1, 1,1,1, 1,1,1]),
-                              dtype=np.float32
-                              )
-            ############################################################
-        else:
-            print("[ERROR] in BaseSingleAgentAviary._observationSpace()")
-    
-    ################################################################################
-
-    def _computeObs(self):
-        """Returns the current observation of the environment.
-
-        Returns
-        -------
-        ndarray
-            A Box() of shape (H,W,4) or (12,) depending on the observation type.
-
-        """
-        if self.OBS_TYPE == ObservationType.RGB:
-            if self.step_counter%self.IMG_CAPTURE_FREQ == 0: 
-                self.rgb[0], self.dep[0], self.seg[0] = self._getDroneImages(0,
-                                                                             segmentation=False
-                                                                             )
-                #### Printing observation to PNG frames example ############
-                if self.RECORD:
-                    self._exportImage(img_type=ImageType.RGB,
-                                      img_input=self.rgb[0],
-                                      path=self.ONBOARD_IMG_PATH,
-                                      frame_num=int(self.step_counter/self.IMG_CAPTURE_FREQ)
-                                      )
-            return self.rgb[0]
-        elif self.OBS_TYPE == ObservationType.KIN: 
-            obs = self._clipAndNormalizeState(self._getDroneStateVector(0))
-            ############################################################
-            #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS)
-            # return obs
-            ############################################################
-            #### OBS SPACE OF SIZE 12
-            ret = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,)
-            return ret.astype('float32')
-            ############################################################
-        else:
-            print("[ERROR] in BaseSingleAgentAviary._computeObs()")
-    
-    ################################################################################
-
-    def _clipAndNormalizeState(self,
-                               state
-                               ):
-        """Normalizes a drone's state to the [-1,1] range.
-
-        Must be implemented in a subclass.
-
-        Parameters
-        ----------
-        state : ndarray
-            Array containing the non-normalized state of a single drone.
-
-        """
-        raise NotImplementedError
diff --git a/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py b/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py
deleted file mode 100644
index f1b072b20..000000000
--- a/gym_pybullet_drones/envs/single_agent_rl/FlyThruGateAviary.py
+++ /dev/null
@@ -1,240 +0,0 @@
-import os
-import numpy as np
-import pybullet as p
-import pkg_resources
-
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType, BaseSingleAgentAviary
-
-
-class FlyThruGateAviary(BaseSingleAgentAviary):
-    """Single agent RL problem: fly through a gate."""
-    
-    ################################################################################
-    
-    def __init__(self,
-                 drone_model: DroneModel=DroneModel.CF2X,
-                 initial_xyzs=None,
-                 initial_rpys=None,
-                 physics: Physics=Physics.PYB,
-                 pyb_freq: int = 240,
-                 ctrl_freq: int = 240,
-                 gui=False,
-                 record=False,
-                 obs: ObservationType=ObservationType.KIN,
-                 act: ActionType=ActionType.RPM
-                 ):
-        """Initialization of a single agent RL environment.
-
-        Using the generic single agent RL superclass.
-
-        Parameters
-        ----------
-        drone_model : DroneModel, optional
-            The desired drone type (detailed in an .urdf file in folder `assets`).
-        initial_xyzs: ndarray | None, optional
-            (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
-        initial_rpys: ndarray | None, optional
-            (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
-        physics : Physics, optional
-            The desired implementation of PyBullet physics/custom dynamics.
-        pyb_freq : int, optional
-            The frequency at which PyBullet steps (a multiple of ctrl_freq).
-        ctrl_freq : int, optional
-            The frequency at which the environment steps.
-        gui : bool, optional
-            Whether to use PyBullet's GUI.
-        record : bool, optional
-            Whether to save a video of the simulation in folder `files/videos/`.
-        obs : ObservationType, optional
-            The type of observation space (kinematic information or vision)
-        act : ActionType, optional
-            The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control)
-
-        """
-        super().__init__(drone_model=drone_model,
-                         initial_xyzs=initial_xyzs,
-                         initial_rpys=initial_rpys,
-                         physics=physics,
-                         pyb_freq=pyb_freq,
-                         ctrl_freq=ctrl_freq,
-                         gui=gui,
-                         record=record,
-                         obs=obs,
-                         act=act
-                         )
-
-    ################################################################################
-    
-    def _addObstacles(self):
-        """Add obstacles to the environment.
-
-        Extends the superclass method and add the gate build of cubes and an architrave.
-
-        """
-        super()._addObstacles()
-        p.loadURDF(pkg_resources.resource_filename('gym_pybullet_drones', 'assets/architrave.urdf'),
-                   [0, -1, .55],
-                   p.getQuaternionFromEuler([0, 0, 0]),
-                   physicsClientId=self.CLIENT
-                   )
-        for i in range(10): 
-            p.loadURDF("cube_small.urdf",
-                       [-.3, -1, .02+i*0.05],
-                       p.getQuaternionFromEuler([0, 0, 0]),
-                       physicsClientId=self.CLIENT
-                       )
-            p.loadURDF("cube_small.urdf",
-                       [.3, -1, .02+i*0.05],
-                       p.getQuaternionFromEuler([0,0,0]),
-                       physicsClientId=self.CLIENT
-                       )
-
-    ################################################################################
-    
-    def _computeReward(self):
-        """Computes the current reward value.
-
-        Returns
-        -------
-        float
-            The reward.
-
-        """
-        state = self._getDroneStateVector(0)
-        norm_ep_time = (self.step_counter/self.PYB_FREQ) / self.EPISODE_LEN_SEC
-        return -10 * np.linalg.norm(np.array([0, -2*norm_ep_time, 0.75])-state[0:3])**2
-
-    ################################################################################
-    
-    def _computeTerminated(self):
-        """Computes the current done value.
-
-        Returns
-        -------
-        bool
-            Whether the current episode is done.
-
-        """
-        if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC:
-            return True
-        else:
-            return False
-        
-    ################################################################################
-    
-    def _computeTruncated(self):
-        """Computes the current truncated value(s).
-
-        Unused in this implementation.
-
-        Returns
-        -------
-        bool
-            Always false.
-
-        """
-        return False
-
-    ################################################################################
-    
-    def _computeInfo(self):
-        """Computes the current info dict(s).
-
-        Unused.
-
-        Returns
-        -------
-        dict[str, int]
-            Dummy value.
-
-        """
-        return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years
-
-    ################################################################################
-    
-    def _clipAndNormalizeState(self,
-                               state
-                               ):
-        """Normalizes a drone's state to the [-1,1] range.
-
-        Parameters
-        ----------
-        state : ndarray
-            (20,)-shaped array of floats containing the non-normalized state of a single drone.
-
-        Returns
-        -------
-        ndarray
-            (20,)-shaped array of floats containing the normalized state of a single drone.
-
-        """
-        MAX_LIN_VEL_XY = 3 
-        MAX_LIN_VEL_Z = 1
-
-        MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC
-        MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC
-
-        MAX_PITCH_ROLL = np.pi # Full range
-
-        clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY)
-        clipped_pos_z = np.clip(state[2], 0, MAX_Z)
-        clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL)
-        clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY)
-        clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z)
-
-        if self.GUI:
-            self._clipAndNormalizeStateWarning(state,
-                                               clipped_pos_xy,
-                                               clipped_pos_z,
-                                               clipped_rp,
-                                               clipped_vel_xy,
-                                               clipped_vel_z
-                                               )
-
-        normalized_pos_xy = clipped_pos_xy / MAX_XY
-        normalized_pos_z = clipped_pos_z / MAX_Z
-        normalized_rp = clipped_rp / MAX_PITCH_ROLL
-        normalized_y = state[9] / np.pi # No reason to clip
-        normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY
-        normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY
-        normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16]
-
-        norm_and_clipped = np.hstack([normalized_pos_xy,
-                                      normalized_pos_z,
-                                      state[3:7],
-                                      normalized_rp,
-                                      normalized_y,
-                                      normalized_vel_xy,
-                                      normalized_vel_z,
-                                      normalized_ang_vel,
-                                      state[16:20]
-                                      ]).reshape(20,)
-
-        return norm_and_clipped
-    
-    ################################################################################
-    
-    def _clipAndNormalizeStateWarning(self,
-                                      state,
-                                      clipped_pos_xy,
-                                      clipped_pos_z,
-                                      clipped_rp,
-                                      clipped_vel_xy,
-                                      clipped_vel_z,
-                                      ):
-        """Debugging printouts associated to `_clipAndNormalizeState`.
-
-        Print a warning if values in a state vector is out of the clipping range.
-        
-        """
-        if not(clipped_pos_xy == np.array(state[0:2])).all():
-            print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1]))
-        if not(clipped_pos_z == np.array(state[2])).all():
-            print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2]))
-        if not(clipped_rp == np.array(state[7:9])).all():
-            print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8]))
-        if not(clipped_vel_xy == np.array(state[10:12])).all():
-            print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11]))
-        if not(clipped_vel_z == np.array(state[12])).all():
-            print("[WARNING] it", self.step_counter, "in FlyThruGateAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12]))
diff --git a/gym_pybullet_drones/envs/single_agent_rl/__init__.py b/gym_pybullet_drones/envs/single_agent_rl/__init__.py
deleted file mode 100644
index b50f296b4..000000000
--- a/gym_pybullet_drones/envs/single_agent_rl/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import BaseSingleAgentAviary
-from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary
-from gym_pybullet_drones.envs.single_agent_rl.FlyThruGateAviary import FlyThruGateAviary
diff --git a/gym_pybullet_drones/examples/learn.py b/gym_pybullet_drones/examples/learn.py
index 97c69d768..ecfef73e6 100644
--- a/gym_pybullet_drones/examples/learn.py
+++ b/gym_pybullet_drones/examples/learn.py
@@ -21,7 +21,8 @@
 from stable_baselines3 import PPO
 
 from gym_pybullet_drones.utils.Logger import Logger
-from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary
+from gym_pybullet_drones.envs.HoverAviary import HoverAviary
+from gym_pybullet_drones.envs.LeaderFollowerAviary import LeaderFollowerAviary
 from gym_pybullet_drones.utils.utils import sync, str2bool
 
 DEFAULT_GUI = True
@@ -32,12 +33,13 @@
 def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=DEFAULT_COLAB, record_video=DEFAULT_RECORD_VIDEO):
 
     #### Check the environment's spaces ########################
-    env = gym.make("hover-aviary-v0")
-    print("[INFO] Action space:", env.action_space)
-    print("[INFO] Observation space:", env.observation_space)
+    # env = gym.make('hover-aviary-v0')
+    env = gym.make('leaderfollower-aviary-v0')
+    print('[INFO] Action space:', env.action_space)
+    print('[INFO] Observation space:', env.observation_space)
 
     #### Train the model #######################################
-    model = PPO("MlpPolicy",
+    model = PPO('MlpPolicy',
                 env,
                 verbose=1
                 )
@@ -59,9 +61,16 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=D
                                         deterministic=True
                                         )
         obs, reward, terminated, truncated, info = env.step(action)
+        obs2 = obs.squeeze()
+        act2 = action.squeeze()
+        print("Obs:", obs, "\tAction", action, "\tReward:", reward, "\tTerminated:", terminated, "\tTruncated:", truncated)
         logger.log(drone=0,
                    timestamp=i/env.CTRL_FREQ,
-                   state=np.hstack([obs[0:3], np.zeros(4), obs[3:15],  np.resize(action, (4))]),
+                   state=np.hstack([obs2[0:3],
+                                    np.zeros(4),
+                                    obs2[3:15],
+                                    act2
+                                    ]),
                    control=np.zeros(12)
                    )
         env.render()
@@ -74,7 +83,7 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=D
     if plot:
         logger.plot()
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     #### Define and parse (optional) arguments for the script ##
     parser = argparse.ArgumentParser(description='Single agent reinforcement learning example script using HoverAviary')
     parser.add_argument('--gui',                default=DEFAULT_GUI,       type=str2bool,      help='Whether to use PyBullet GUI (default: True)', metavar='')
@@ -293,32 +302,32 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=D
 #                                                                 )
 
 #     #### Create eveluation environment #########################
-#     if obs == ObservationType.KIN: 
+#     if obs == ObservationType.KIN:
 #         eval_env = gym.make(env_name,
 #                             aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
 #                             obs=obs,
 #                             act=act
 #                             )
 #     elif obs == ObservationType.RGB:
-#         if env_name == "takeoff-aviary-v0": 
+#         if env_name == "takeoff-aviary-v0":
 #             eval_env = make_vec_env(TakeoffAviary,
 #                                     env_kwargs=sa_env_kwargs,
 #                                     n_envs=1,
 #                                     seed=0
 #                                     )
-#         if env_name == "hover-aviary-v0": 
+#         if env_name == "hover-aviary-v0":
 #             eval_env = make_vec_env(HoverAviary,
 #                                     env_kwargs=sa_env_kwargs,
 #                                     n_envs=1,
 #                                     seed=0
 #                                     )
-#         if env_name == "flythrugate-aviary-v0": 
+#         if env_name == "flythrugate-aviary-v0":
 #             eval_env = make_vec_env(FlyThruGateAviary,
 #                                     env_kwargs=sa_env_kwargs,
 #                                     n_envs=1,
 #                                     seed=0
 #                                     )
-#         if env_name == "tune-aviary-v0": 
+#         if env_name == "tune-aviary-v0":
 #             eval_env = make_vec_env(TuneAviary,
 #                                     env_kwargs=sa_env_kwargs,
 #                                     n_envs=1,
@@ -362,8 +371,8 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=D
 #     parser.add_argument('--algo',       default=DEFAULT_ALGO,        type=str,             choices=['a2c', 'ppo', 'sac', 'td3', 'ddpg'],        help='RL agent (default: ppo)', metavar='')
 #     parser.add_argument('--obs',        default=DEFAULT_OBS,        type=ObservationType,                                                      help='Observation space (default: kin)', metavar='')
 #     parser.add_argument('--act',        default=DEFAULT_ACT,  type=ActionType,                                                           help='Action space (default: one_d_rpm)', metavar='')
-#     parser.add_argument('--cpu',        default=DEFAULT_CPU,          type=int,                                                                  help='Number of training environments (default: 1)', metavar='')        
-#     parser.add_argument('--steps',        default=DEFAULT_STEPS,          type=int,                                                                  help='Number of training time steps (default: 35000)', metavar='')        
+#     parser.add_argument('--cpu',        default=DEFAULT_CPU,          type=int,                                                                  help='Number of training environments (default: 1)', metavar='')
+#     parser.add_argument('--steps',        default=DEFAULT_STEPS,          type=int,                                                                  help='Number of training time steps (default: 35000)', metavar='')
 #     parser.add_argument('--output_folder',     default=DEFAULT_OUTPUT_FOLDER, type=str,           help='Folder where to save logs (default: "results")', metavar='')
 #     ARGS = parser.parse_args()