Simplified RL aviaries

utiasDSL · Oct 15, 2023 · dd95218 · dd95218
1 parent e2412b7
commit dd95218
Show file tree

Hide file tree

Showing 13 changed files with 59 additions and 880 deletions.
diff --git a/README.md b/README.md
@@ -99,10 +99,11 @@ If you wish, please cite our [IROS 2021 paper](https://arxiv.org/abs/2103.02142)
 ## TODO
 
 - [ ] Add `crazyflie-firmware` SITL support @spencerteetaert
+
+## DESIDERATA
+
 - [ ] Add motor delay @JacopoPan / @spencerteetaert
 - [ ] Replace `rpy` with quaternions (and `ang_vel` with body rates) in `obs` @JacopoPan
-- [ ] Replace `BaseSingleAgentAviary` and `BaseMultiAgentAviary` with a single `RLAviary`, incl. PR #161 @JacopoPan
-- [ ] Add a multi-agent MDP with 2-drone chase through a gate @JacopoPan
 
 -----
 > University of Toronto's [Dynamic Systems Lab](https://github.com/utiasDSL) / [Vector Institute](https://github.com/VectorInstitute) / University of Cambridge's [Prorok Lab](https://github.com/proroklab)
diff --git a/gym_pybullet_drones/__init__.py b/gym_pybullet_drones/__init__.py
@@ -12,20 +12,10 @@
 
 register(
     id='hover-aviary-v0',
-    entry_point='gym_pybullet_drones.envs.single_agent_rl:HoverAviary',
-)
-
-register(
-    id='flythrugate-aviary-v0',
-    entry_point='gym_pybullet_drones.envs.single_agent_rl:FlyThruGateAviary',
-)
-
-register(
-    id='flock-aviary-v0',
-    entry_point='gym_pybullet_drones.envs.multi_agent_rl:FlockAviary',
+    entry_point='gym_pybullet_drones.envs:HoverAviary',
 )
 
 register(
     id='leaderfollower-aviary-v0',
-    entry_point='gym_pybullet_drones.envs.multi_agent_rl:LeaderFollowerAviary',
+    entry_point='gym_pybullet_drones.envs:LeaderFollowerAviary',
 )
diff --git a/...vs/multi_agent_rl/BaseMultiagentAviary.py → gym_pybullet_drones/envs/BaseRLAviary.py b/...vs/multi_agent_rl/BaseMultiagentAviary.py → gym_pybullet_drones/envs/BaseRLAviary.py
@@ -7,14 +7,14 @@
 from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType, ImageType
 from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl
 
-class BaseMultiagentAviary(BaseAviary):
-    """Base multi-agent environment class for reinforcement learning."""
+class BaseRLAviary(BaseAviary):
+    """Base single and multi-agent environment class for reinforcement learning."""
 
     ################################################################################
 
     def __init__(self,
                  drone_model: DroneModel=DroneModel.CF2X,
-                 num_drones: int=2,
+                 num_drones: int=1,
                  neighbourhood_radius: float=np.inf,
                  initial_xyzs=None,
                  initial_rpys=None,
@@ -26,7 +26,7 @@ def __init__(self,
                  obs: ObservationType=ObservationType.KIN,
                  act: ActionType=ActionType.RPM
                  ):
-        """Initialization of a generic multi-agent RL environment.
+        """Initialization of a generic single and multi-agent RL environment.
 
         Attributes `vision_attributes` and `dynamics_attributes` are selected
         based on the choice of `obs` and `act`; `obstacles` is set to True 
@@ -61,9 +61,6 @@ def __init__(self,
             The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.)
 
         """
-        if num_drones < 2:
-            print("[ERROR] in BaseMultiagentAviary.__init__(), num_drones should be >= 2")
-            exit()
         vision_attributes = True if obs == ObservationType.RGB else False
         self.OBS_TYPE = obs
         self.ACT_TYPE = act
@@ -74,7 +71,7 @@ def __init__(self,
             if drone_model in [DroneModel.CF2X, DroneModel.CF2P]:
                 self.ctrl = [DSLPIDControl(drone_model=DroneModel.CF2X) for i in range(num_drones)]
             else:
-                print("[ERROR] in BaseMultiagentAviary.__init()__, no controller is available for the specified drone_model")
+                print("[ERROR] in BaseRLAviary.__init()__, no controller is available for the specified drone_model")
         super().__init__(drone_model=drone_model,
                          num_drones=num_drones,
                          neighbourhood_radius=neighbourhood_radius,
@@ -144,7 +141,7 @@ def _actionSpace(self):
         elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]:
             size = 1
         else:
-            print("[ERROR] in BaseMultiagentAviary._actionSpace()")
+            print("[ERROR] in BaseRLAviary._actionSpace()")
             exit()
         act_lower_bound = np.array([-1*np.ones(size) for i in range(self.NUM_DRONES)])
         act_upper_bound = np.array([+1*np.ones(size) for i in range(self.NUM_DRONES)])
@@ -228,7 +225,7 @@ def _preprocessAction(self,
                                                         )
                 rpm[k,:] = rpm
             else:
-                print("[ERROR] in BaseMultiagentAviary._preprocessAction()")
+                print("[ERROR] in BaseRLAviary._preprocessAction()")
                 exit()
         return rpm
 
@@ -244,10 +241,6 @@ def _observationSpace(self):
 
         """
         if self.OBS_TYPE == ObservationType.RGB:
-            # return spaces.Dict({i: spaces.Box(low=0,
-            #                                   high=255,
-            #                                   shape=(self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8
-            #                                   ) for i in range(self.NUM_DRONES)})
             return spaces.Box(low=0,
                               high=255,
                               shape=(self.NUM_DRONES, self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8)
@@ -259,13 +252,14 @@ def _observationSpace(self):
             # obs_upper_bound = np.array([1,       1,       1,      1,   1,   1,   1,   1,      1,      1,      1,       1,       1,       1,       1,       1,       1,            1,            1,            1])          
             # return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 )
             ############################################################
-            #### OBS SPACE OF SIZE 12
+            #### OBS SPACE OF SIZE 12, W/O RPMS
+            #### Observation vector ### X        Y        Z       Q1   Q2   Q3   Q4   R       P       Y       VX       VY       VZ       WX       WY       WZ
             obs_lower_bound = np.array([[-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1] for i in range(self.NUM_DRONES)])
             obs_upper_bound = np.array([[1,1,1, 1,1,1, 1,1,1, 1,1,1] for i in range(self.NUM_DRONES)])
             return spaces.Box(low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32)
             ############################################################
         else:
-            print("[ERROR] in BaseMultiagentAviary._observationSpace()")
+            print("[ERROR] in BaseRLAviary._observationSpace()")
 
     ################################################################################
 
@@ -291,21 +285,21 @@ def _computeObs(self):
                                           path=self.ONBOARD_IMG_PATH+"drone_"+str(i),
                                           frame_num=int(self.step_counter/self.IMG_CAPTURE_FREQ)
                                           )
-            return np.array([self.rgb[i] for i in range(self.NUM_DRONES)])
+            return np.array([self.rgb[i] for i in range(self.NUM_DRONES)]).astype('float32')
         elif self.OBS_TYPE == ObservationType.KIN:
             ############################################################
             #### OBS OF SIZE 20 (WITH QUATERNION AND RPMS)
-            # return {   i   : self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) }
+            # return np.array([ self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) ]).astype('float32')
             ############################################################
             #### OBS SPACE OF SIZE 12
             obs_12 = np.zeros((self.NUM_DRONES,12))
             for i in range(self.NUM_DRONES):
                 obs = self._clipAndNormalizeState(self._getDroneStateVector(i))
                 obs_12[i, :] = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,)
-            return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)])
+            return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32')
             ############################################################
         else:
-            print("[ERROR] in BaseMultiagentAviary._computeObs()")
+            print("[ERROR] in BaseRLAviary._computeObs()")
 
     ################################################################################
 

diff --git a/...rones/envs/single_agent_rl/HoverAviary.py → gym_pybullet_drones/envs/HoverAviary.py b/...rones/envs/single_agent_rl/HoverAviary.py → gym_pybullet_drones/envs/HoverAviary.py
@@ -1,9 +1,9 @@
 import numpy as np
 
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType, BaseSingleAgentAviary
+from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary
+from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType
 
-class HoverAviary(BaseSingleAgentAviary):
+class HoverAviary(BaseRLAviary):
     """Single agent RL problem: hover at position."""
 
     ################################################################################
@@ -49,6 +49,7 @@ def __init__(self,
 
         """
         super().__init__(drone_model=drone_model,
+                         num_drones=1,
                          initial_xyzs=initial_xyzs,
                          initial_rpys=initial_rpys,
                          physics=physics,

diff --git a/...vs/multi_agent_rl/LeaderFollowerAviary.py → ...ullet_drones/envs/LeaderFollowerAviary.py b/...vs/multi_agent_rl/LeaderFollowerAviary.py → ...ullet_drones/envs/LeaderFollowerAviary.py
@@ -1,10 +1,9 @@
 import numpy as np
 
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
-from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary
+from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary
+from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType
 
-class LeaderFollowerAviary(BaseMultiagentAviary):
+class LeaderFollowerAviary(BaseRLAviary):
     """Multi-agent RL problem: leader-follower."""
 
     ################################################################################
@@ -79,13 +78,13 @@ def _computeReward(self):
             The reward value for each drone.
 
         """
-        rewards = {}
+        rewards = np.zeros(self.NUM_DRONES)
         states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)])
         rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 0.5]) - states[0, 0:3])**2
-        # rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD 
+        # rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD
         for i in range(1, self.NUM_DRONES):
-            rewards[i] = -(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2
-        return rewards
+            rewards[i] = (-(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2)
+        return rewards[0] #TODO: return multiple rewards
 
     ################################################################################
 
@@ -100,9 +99,9 @@ def _computeTerminated(self):
 
         """
         bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False
-        done = {i: bool_val for i in range(self.NUM_DRONES)}
-        done["__all__"] = bool_val # True if True in done.values() else False
-        return done
+        # done = {i: bool_val for i in range(self.NUM_DRONES)}
+        # done["__all__"] = bool_val # True if True in done.values() else False
+        return bool_val #TODO: return multiple terminatation values
 
     ################################################################################
 

diff --git a/gym_pybullet_drones/envs/RLAviary.py b/gym_pybullet_drones/envs/RLAviary.py
diff --git a/gym_pybullet_drones/envs/__init__.py b/gym_pybullet_drones/envs/__init__.py
@@ -1,2 +1,5 @@
+from gym_pybullet_drones.envs.BetaAviary import BetaAviary
 from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary
+from gym_pybullet_drones.envs.HoverAviary import HoverAviary
+from gym_pybullet_drones.envs.LeaderFollowerAviary import LeaderFollowerAviary
 from gym_pybullet_drones.envs.VelocityAviary import VelocityAviary