Skip to content

Commit

Permalink
Simplified RL aviaries
Browse files Browse the repository at this point in the history
  • Loading branch information
JacopoPan committed Oct 15, 2023
1 parent e2412b7 commit dd95218
Show file tree
Hide file tree
Showing 13 changed files with 59 additions and 880 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,11 @@ If you wish, please cite our [IROS 2021 paper](https://arxiv.org/abs/2103.02142)
## TODO

- [ ] Add `crazyflie-firmware` SITL support @spencerteetaert

## DESIDERATA

- [ ] Add motor delay @JacopoPan / @spencerteetaert
- [ ] Replace `rpy` with quaternions (and `ang_vel` with body rates) in `obs` @JacopoPan
- [ ] Replace `BaseSingleAgentAviary` and `BaseMultiAgentAviary` with a single `RLAviary`, incl. PR #161 @JacopoPan
- [ ] Add a multi-agent MDP with 2-drone chase through a gate @JacopoPan

-----
> University of Toronto's [Dynamic Systems Lab](https://github.com/utiasDSL) / [Vector Institute](https://github.com/VectorInstitute) / University of Cambridge's [Prorok Lab](https://github.com/proroklab)
14 changes: 2 additions & 12 deletions gym_pybullet_drones/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,10 @@

register(
id='hover-aviary-v0',
entry_point='gym_pybullet_drones.envs.single_agent_rl:HoverAviary',
)

register(
id='flythrugate-aviary-v0',
entry_point='gym_pybullet_drones.envs.single_agent_rl:FlyThruGateAviary',
)

register(
id='flock-aviary-v0',
entry_point='gym_pybullet_drones.envs.multi_agent_rl:FlockAviary',
entry_point='gym_pybullet_drones.envs:HoverAviary',
)

register(
id='leaderfollower-aviary-v0',
entry_point='gym_pybullet_drones.envs.multi_agent_rl:LeaderFollowerAviary',
entry_point='gym_pybullet_drones.envs:LeaderFollowerAviary',
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType, ImageType
from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl

class BaseMultiagentAviary(BaseAviary):
"""Base multi-agent environment class for reinforcement learning."""
class BaseRLAviary(BaseAviary):
"""Base single and multi-agent environment class for reinforcement learning."""

################################################################################

def __init__(self,
drone_model: DroneModel=DroneModel.CF2X,
num_drones: int=2,
num_drones: int=1,
neighbourhood_radius: float=np.inf,
initial_xyzs=None,
initial_rpys=None,
Expand All @@ -26,7 +26,7 @@ def __init__(self,
obs: ObservationType=ObservationType.KIN,
act: ActionType=ActionType.RPM
):
"""Initialization of a generic multi-agent RL environment.
"""Initialization of a generic single and multi-agent RL environment.
Attributes `vision_attributes` and `dynamics_attributes` are selected
based on the choice of `obs` and `act`; `obstacles` is set to True
Expand Down Expand Up @@ -61,9 +61,6 @@ def __init__(self,
The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.)
"""
if num_drones < 2:
print("[ERROR] in BaseMultiagentAviary.__init__(), num_drones should be >= 2")
exit()
vision_attributes = True if obs == ObservationType.RGB else False
self.OBS_TYPE = obs
self.ACT_TYPE = act
Expand All @@ -74,7 +71,7 @@ def __init__(self,
if drone_model in [DroneModel.CF2X, DroneModel.CF2P]:
self.ctrl = [DSLPIDControl(drone_model=DroneModel.CF2X) for i in range(num_drones)]
else:
print("[ERROR] in BaseMultiagentAviary.__init()__, no controller is available for the specified drone_model")
print("[ERROR] in BaseRLAviary.__init()__, no controller is available for the specified drone_model")
super().__init__(drone_model=drone_model,
num_drones=num_drones,
neighbourhood_radius=neighbourhood_radius,
Expand Down Expand Up @@ -144,7 +141,7 @@ def _actionSpace(self):
elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]:
size = 1
else:
print("[ERROR] in BaseMultiagentAviary._actionSpace()")
print("[ERROR] in BaseRLAviary._actionSpace()")
exit()
act_lower_bound = np.array([-1*np.ones(size) for i in range(self.NUM_DRONES)])
act_upper_bound = np.array([+1*np.ones(size) for i in range(self.NUM_DRONES)])
Expand Down Expand Up @@ -228,7 +225,7 @@ def _preprocessAction(self,
)
rpm[k,:] = rpm
else:
print("[ERROR] in BaseMultiagentAviary._preprocessAction()")
print("[ERROR] in BaseRLAviary._preprocessAction()")
exit()
return rpm

Expand All @@ -244,10 +241,6 @@ def _observationSpace(self):
"""
if self.OBS_TYPE == ObservationType.RGB:
# return spaces.Dict({i: spaces.Box(low=0,
# high=255,
# shape=(self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8
# ) for i in range(self.NUM_DRONES)})
return spaces.Box(low=0,
high=255,
shape=(self.NUM_DRONES, self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8)
Expand All @@ -259,13 +252,14 @@ def _observationSpace(self):
# obs_upper_bound = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
# return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 )
############################################################
#### OBS SPACE OF SIZE 12
#### OBS SPACE OF SIZE 12, W/O RPMS
#### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ
obs_lower_bound = np.array([[-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1] for i in range(self.NUM_DRONES)])
obs_upper_bound = np.array([[1,1,1, 1,1,1, 1,1,1, 1,1,1] for i in range(self.NUM_DRONES)])
return spaces.Box(low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32)
############################################################
else:
print("[ERROR] in BaseMultiagentAviary._observationSpace()")
print("[ERROR] in BaseRLAviary._observationSpace()")

################################################################################

Expand All @@ -291,21 +285,21 @@ def _computeObs(self):
path=self.ONBOARD_IMG_PATH+"drone_"+str(i),
frame_num=int(self.step_counter/self.IMG_CAPTURE_FREQ)
)
return np.array([self.rgb[i] for i in range(self.NUM_DRONES)])
return np.array([self.rgb[i] for i in range(self.NUM_DRONES)]).astype('float32')
elif self.OBS_TYPE == ObservationType.KIN:
############################################################
#### OBS OF SIZE 20 (WITH QUATERNION AND RPMS)
# return { i : self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) }
# return np.array([ self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) ]).astype('float32')
############################################################
#### OBS SPACE OF SIZE 12
obs_12 = np.zeros((self.NUM_DRONES,12))
for i in range(self.NUM_DRONES):
obs = self._clipAndNormalizeState(self._getDroneStateVector(i))
obs_12[i, :] = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,)
return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)])
return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32')
############################################################
else:
print("[ERROR] in BaseMultiagentAviary._computeObs()")
print("[ERROR] in BaseRLAviary._computeObs()")

################################################################################

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import numpy as np

from gym_pybullet_drones.utils.enums import DroneModel, Physics
from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType, BaseSingleAgentAviary
from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary
from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType

class HoverAviary(BaseSingleAgentAviary):
class HoverAviary(BaseRLAviary):
"""Single agent RL problem: hover at position."""

################################################################################
Expand Down Expand Up @@ -49,6 +49,7 @@ def __init__(self,
"""
super().__init__(drone_model=drone_model,
num_drones=1,
initial_xyzs=initial_xyzs,
initial_rpys=initial_rpys,
physics=physics,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import numpy as np

from gym_pybullet_drones.utils.enums import DroneModel, Physics
from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
from gym_pybullet_drones.envs.multi_agent_rl.BaseMultiagentAviary import BaseMultiagentAviary
from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary
from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType

class LeaderFollowerAviary(BaseMultiagentAviary):
class LeaderFollowerAviary(BaseRLAviary):
"""Multi-agent RL problem: leader-follower."""

################################################################################
Expand Down Expand Up @@ -79,13 +78,13 @@ def _computeReward(self):
The reward value for each drone.
"""
rewards = {}
rewards = np.zeros(self.NUM_DRONES)
states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)])
rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 0.5]) - states[0, 0:3])**2
# rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD
# rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD
for i in range(1, self.NUM_DRONES):
rewards[i] = -(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2
return rewards
rewards[i] = (-(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2)
return rewards[0] #TODO: return multiple rewards

################################################################################

Expand All @@ -100,9 +99,9 @@ def _computeTerminated(self):
"""
bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False
done = {i: bool_val for i in range(self.NUM_DRONES)}
done["__all__"] = bool_val # True if True in done.values() else False
return done
# done = {i: bool_val for i in range(self.NUM_DRONES)}
# done["__all__"] = bool_val # True if True in done.values() else False
return bool_val #TODO: return multiple terminatation values

################################################################################

Expand Down
2 changes: 0 additions & 2 deletions gym_pybullet_drones/envs/RLAviary.py

This file was deleted.

3 changes: 3 additions & 0 deletions gym_pybullet_drones/envs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
from gym_pybullet_drones.envs.BetaAviary import BetaAviary
from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary
from gym_pybullet_drones.envs.HoverAviary import HoverAviary
from gym_pybullet_drones.envs.LeaderFollowerAviary import LeaderFollowerAviary
from gym_pybullet_drones.envs.VelocityAviary import VelocityAviary
Loading

0 comments on commit dd95218

Please sign in to comment.