From ca1feec8699e0358f7c2cc81545eab27b3b899ed Mon Sep 17 00:00:00 2001 From: Jacopo Panerati Date: Sat, 15 Apr 2023 19:52:37 +0400 Subject: [PATCH] further cleanup --- experiments/learning/ma_script.bash | 0 experiments/learning/ma_script.slrm | 0 experiments/learning/multiagent.py | 309 ---------------- experiments/learning/results/foo.txt | 0 experiments/learning/test_multiagent.py | 332 ------------------ experiments/{learning => }/sa_script.bash | 0 experiments/{learning => }/sa_script.slrm | 0 .../{learning => }/shared_constants.py | 0 experiments/{learning => }/singleagent.py | 0 .../{learning => }/test_singleagent.py | 0 gym_pybullet_drones/envs/DynAviary.py | 219 ------------ gym_pybullet_drones/envs/RLAviary.py | 2 + gym_pybullet_drones/examples/compare.py | 140 -------- gym_pybullet_drones/examples/groundeffect.py | 167 --------- .../examples/{ => learning}/learn.py | 0 .../examples/learning/sa_script.bash | 23 ++ .../examples/learning/sa_script.slrm | 53 +++ .../examples/learning/shared_constants.py | 2 + .../examples/learning/singleagent.py | 283 +++++++++++++++ .../examples/learning/test_singleagent.py | 141 ++++++++ .../examples/{velocity.py => pid-velocity.py} | 0 .../examples/{fly.py => pid.py} | 0 pyproject.toml | 6 +- 23 files changed, 507 insertions(+), 1170 deletions(-) delete mode 100644 experiments/learning/ma_script.bash delete mode 100644 experiments/learning/ma_script.slrm delete mode 100644 experiments/learning/multiagent.py delete mode 100644 experiments/learning/results/foo.txt delete mode 100644 experiments/learning/test_multiagent.py rename experiments/{learning => }/sa_script.bash (100%) rename experiments/{learning => }/sa_script.slrm (100%) rename experiments/{learning => }/shared_constants.py (100%) rename experiments/{learning => }/singleagent.py (100%) rename experiments/{learning => }/test_singleagent.py (100%) delete mode 100644 gym_pybullet_drones/envs/DynAviary.py create mode 100644 gym_pybullet_drones/envs/RLAviary.py delete mode 100644 gym_pybullet_drones/examples/compare.py delete mode 100644 gym_pybullet_drones/examples/groundeffect.py rename gym_pybullet_drones/examples/{ => learning}/learn.py (100%) create mode 100755 gym_pybullet_drones/examples/learning/sa_script.bash create mode 100644 gym_pybullet_drones/examples/learning/sa_script.slrm create mode 100644 gym_pybullet_drones/examples/learning/shared_constants.py create mode 100644 gym_pybullet_drones/examples/learning/singleagent.py create mode 100644 gym_pybullet_drones/examples/learning/test_singleagent.py rename gym_pybullet_drones/examples/{velocity.py => pid-velocity.py} (100%) rename gym_pybullet_drones/examples/{fly.py => pid.py} (100%) diff --git a/experiments/learning/ma_script.bash b/experiments/learning/ma_script.bash deleted file mode 100644 index e69de29bb..000000000 diff --git a/experiments/learning/ma_script.slrm b/experiments/learning/ma_script.slrm deleted file mode 100644 index e69de29bb..000000000 diff --git a/experiments/learning/multiagent.py b/experiments/learning/multiagent.py deleted file mode 100644 index 2ae74f620..000000000 --- a/experiments/learning/multiagent.py +++ /dev/null @@ -1,309 +0,0 @@ -"""Learning script for multi-agent problems. - -Agents are based on `ray[rllib]`'s implementation of PPO and use a custom centralized critic. - -Example -------- -To run the script, type in a terminal: - - $ python multiagent.py --num_drones --env --obs --act --algo --num_workers - -Notes ------ -Check Ray's status at: - - http://127.0.0.1:8265 - -""" -import os -import time -import argparse -from datetime import datetime -from sys import platform -import subprocess -import pdb -import math -import numpy as np -import pybullet as p -import pickle -import matplotlib.pyplot as plt -import gym -from gym import error, spaces, utils -from gym.utils import seeding -from gym.spaces import Box, Dict -import torch -import torch.nn as nn -from ray.rllib.models.torch.fcnet import FullyConnectedNetwork -import ray -from ray import tune -from ray.tune.logger import DEFAULT_LOGGERS -from ray.tune import register_env -from ray.rllib.agents import ppo -from ray.rllib.agents.ppo import PPOTrainer, PPOTFPolicy -from ray.rllib.examples.policy.random_policy import RandomPolicy -from ray.rllib.utils.test_utils import check_learning_achieved -from ray.rllib.agents.callbacks import DefaultCallbacks -from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 -from ray.rllib.models import ModelCatalog -from ray.rllib.policy.sample_batch import SampleBatch -from ray.rllib.env.multi_agent_env import ENV_STATE - -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary -from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary -from gym_pybullet_drones.envs.multi_agent_rl.MeetupAviary import MeetupAviary -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType -from gym_pybullet_drones.utils.Logger import Logger - -import shared_constants - -OWN_OBS_VEC_SIZE = None # Modified at runtime -ACTION_VEC_SIZE = None # Modified at runtime - -#### Useful links ########################################## -# Workflow: github.com/ray-project/ray/blob/master/doc/source/rllib-training.rst -# ENV_STATE example: github.com/ray-project/ray/blob/master/rllib/examples/env/two_step_game.py -# Competing policies example: github.com/ray-project/ray/blob/master/rllib/examples/rock_paper_scissors_multiagent.py - -############################################################ -class CustomTorchCentralizedCriticModel(TorchModelV2, nn.Module): - """Multi-agent model that implements a centralized value function. - - It assumes the observation is a dict with 'own_obs' and 'opponent_obs', the - former of which can be used for computing actions (i.e., decentralized - execution), and the latter for optimization (i.e., centralized learning). - - This model has two parts: - - An action model that looks at just 'own_obs' to compute actions - - A value model that also looks at the 'opponent_obs' / 'opponent_action' - to compute the value (it does this by using the 'obs_flat' tensor). - """ - - def __init__(self, obs_space, action_space, num_outputs, model_config, name): - TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) - nn.Module.__init__(self) - self.action_model = FullyConnectedNetwork( - Box(low=-1, high=1, shape=(OWN_OBS_VEC_SIZE, )), - action_space, - num_outputs, - model_config, - name + "_action" - ) - self.value_model = FullyConnectedNetwork( - obs_space, - action_space, - 1, - model_config, - name + "_vf" - ) - self._model_in = None - - def forward(self, input_dict, state, seq_lens): - self._model_in = [input_dict["obs_flat"], state, seq_lens] - return self.action_model({"obs": input_dict["obs"]["own_obs"]}, state, seq_lens) - - def value_function(self): - value_out, _ = self.value_model({"obs": self._model_in[0]}, self._model_in[1], self._model_in[2]) - return torch.reshape(value_out, [-1]) - -############################################################ -class FillInActions(DefaultCallbacks): - def on_postprocess_trajectory(self, worker, episode, agent_id, policy_id, policies, postprocessed_batch, original_batches, **kwargs): - to_update = postprocessed_batch[SampleBatch.CUR_OBS] - other_id = 1 if agent_id == 0 else 0 - action_encoder = ModelCatalog.get_preprocessor_for_space( - # Box(-np.inf, np.inf, (ACTION_VEC_SIZE,), np.float32) # Unbounded - Box(-1, 1, (ACTION_VEC_SIZE,), np.float32) # Bounded - ) - _, opponent_batch = original_batches[other_id] - # opponent_actions = np.array([action_encoder.transform(a) for a in opponent_batch[SampleBatch.ACTIONS]]) # Unbounded - opponent_actions = np.array([action_encoder.transform(np.clip(a, -1, 1)) for a in opponent_batch[SampleBatch.ACTIONS]]) # Bounded - to_update[:, -ACTION_VEC_SIZE:] = opponent_actions - -############################################################ -def central_critic_observer(agent_obs, **kw): - new_obs = { - 0: { - "own_obs": agent_obs[0], - "opponent_obs": agent_obs[1], - "opponent_action": np.zeros(ACTION_VEC_SIZE), # Filled in by FillInActions - }, - 1: { - "own_obs": agent_obs[1], - "opponent_obs": agent_obs[0], - "opponent_action": np.zeros(ACTION_VEC_SIZE), # Filled in by FillInActions - }, - } - return new_obs - -############################################################ -if __name__ == "__main__": - - #### Define and parse (optional) arguments for the script ## - parser = argparse.ArgumentParser(description='Multi-agent reinforcement learning experiments script') - parser.add_argument('--num_drones', default=2, type=int, help='Number of drones (default: 2)', metavar='') - parser.add_argument('--env', default='leaderfollower', type=str, choices=['leaderfollower', 'flock', 'meetup'], help='Task (default: leaderfollower)', metavar='') - parser.add_argument('--obs', default='kin', type=ObservationType, help='Observation space (default: kin)', metavar='') - parser.add_argument('--act', default='one_d_rpm', type=ActionType, help='Action space (default: one_d_rpm)', metavar='') - parser.add_argument('--algo', default='cc', type=str, choices=['cc'], help='MARL approach (default: cc)', metavar='') - parser.add_argument('--workers', default=0, type=int, help='Number of RLlib workers (default: 0)', metavar='') - ARGS = parser.parse_args() - - #### Save directory ######################################## - filename = os.path.dirname(os.path.abspath(__file__))+'/results/save-'+ARGS.env+'-'+str(ARGS.num_drones)+'-'+ARGS.algo+'-'+ARGS.obs.value+'-'+ARGS.act.value+'-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S") - if not os.path.exists(filename): - os.makedirs(filename+'/') - - #### Print out current git commit hash ##################### - if platform == "linux" or platform == "darwin": - git_commit = subprocess.check_output(["git", "describe", "--tags"]).strip() - with open(filename+'/git_commit.txt', 'w+') as f: - f.write(str(git_commit)) - - #### Constants, and errors ################################# - if ARGS.obs==ObservationType.KIN: - OWN_OBS_VEC_SIZE = 12 - elif ARGS.obs==ObservationType.RGB: - print("[ERROR] ObservationType.RGB for multi-agent systems not yet implemented") - exit() - else: - print("[ERROR] unknown ObservationType") - exit() - if ARGS.act in [ActionType.ONE_D_RPM, ActionType.ONE_D_DYN, ActionType.ONE_D_PID]: - ACTION_VEC_SIZE = 1 - elif ARGS.act in [ActionType.RPM, ActionType.DYN, ActionType.VEL]: - ACTION_VEC_SIZE = 4 - elif ARGS.act == ActionType.PID: - ACTION_VEC_SIZE = 3 - else: - print("[ERROR] unknown ActionType") - exit() - - #### Uncomment to debug slurm scripts ###################### - # exit() - - #### Initialize Ray Tune ################################### - ray.shutdown() - ray.init(ignore_reinit_error=True) - - #### Register the custom centralized critic model ########## - ModelCatalog.register_custom_model("cc_model", CustomTorchCentralizedCriticModel) - - #### Register the environment ############################## - temp_env_name = "this-aviary-v0" - if ARGS.env == 'flock': - register_env(temp_env_name, lambda _: FlockAviary(num_drones=ARGS.num_drones, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=ARGS.obs, - act=ARGS.act - ) - ) - elif ARGS.env == 'leaderfollower': - register_env(temp_env_name, lambda _: LeaderFollowerAviary(num_drones=ARGS.num_drones, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=ARGS.obs, - act=ARGS.act - ) - ) - elif ARGS.env == 'meetup': - register_env(temp_env_name, lambda _: MeetupAviary(num_drones=ARGS.num_drones, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=ARGS.obs, - act=ARGS.act - ) - ) - else: - print("[ERROR] environment not yet implemented") - exit() - - #### Unused env to extract the act and obs spaces ########## - if ARGS.env == 'flock': - temp_env = FlockAviary(num_drones=ARGS.num_drones, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=ARGS.obs, - act=ARGS.act - ) - elif ARGS.env == 'leaderfollower': - temp_env = LeaderFollowerAviary(num_drones=ARGS.num_drones, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=ARGS.obs, - act=ARGS.act - ) - elif ARGS.env == 'meetup': - temp_env = MeetupAviary(num_drones=ARGS.num_drones, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=ARGS.obs, - act=ARGS.act - ) - else: - print("[ERROR] environment not yet implemented") - exit() - observer_space = Dict({ - "own_obs": temp_env.observation_space[0], - "opponent_obs": temp_env.observation_space[0], - "opponent_action": temp_env.action_space[0], - }) - action_space = temp_env.action_space[0] - - #### Note ################################################## - # RLlib will create ``num_workers + 1`` copies of the - # environment since one copy is needed for the driver process. - # To avoid paying the extra overhead of the driver copy, - # which is needed to access the env's action and observation spaces, - # you can defer environment initialization until ``reset()`` is called - - #### Set up the trainer's config ########################### - config = ppo.DEFAULT_CONFIG.copy() # For the default config, see github.com/ray-project/ray/blob/master/rllib/agents/trainer.py - config = { - "env": temp_env_name, - "num_workers": 0 + ARGS.workers, - "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0 - "batch_mode": "complete_episodes", - "callbacks": FillInActions, - "framework": "torch", - } - - #### Set up the model parameters of the trainer's config ### - config["model"] = { - "custom_model": "cc_model", - } - - #### Set up the multiagent params of the trainer's config ## - config["multiagent"] = { - "policies": { - "pol0": (None, observer_space, action_space, {"agent_id": 0,}), - "pol1": (None, observer_space, action_space, {"agent_id": 1,}), - }, - "policy_mapping_fn": lambda x: "pol0" if x == 0 else "pol1", # # Function mapping agent ids to policy ids - "observation_fn": central_critic_observer, # See rllib/evaluation/observation_function.py for more info - } - - #### Ray Tune stopping conditions ########################## - stop = { - "timesteps_total": 120000, # 100000 ~= 10' - # "episode_reward_mean": 0, - # "training_iteration": 0, - } - - #### Train ################################################# - results = tune.run( - "PPO", - stop=stop, - config=config, - verbose=True, - checkpoint_at_end=True, - local_dir=filename, - ) - # check_learning_achieved(results, 1.0) - - #### Save agent ############################################ - checkpoints = results.get_trial_checkpoints_paths(trial=results.get_best_trial('episode_reward_mean', - mode='max' - ), - metric='episode_reward_mean' - ) - with open(filename+'/checkpoint.txt', 'w+') as f: - f.write(checkpoints[0][0]) - - #### Shut down Ray ######################################### - ray.shutdown() diff --git a/experiments/learning/results/foo.txt b/experiments/learning/results/foo.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/experiments/learning/test_multiagent.py b/experiments/learning/test_multiagent.py deleted file mode 100644 index 3176c8a5f..000000000 --- a/experiments/learning/test_multiagent.py +++ /dev/null @@ -1,332 +0,0 @@ -"""Test script for multiagent problems. - -This scripts runs the best model found by one of the executions of `multiagent.py` - -Example -------- -To run the script, type in a terminal: - - $ python test_multiagent.py --exp ./results/save------ - -""" -import os -import time -import argparse -from datetime import datetime -import pdb -import math -import numpy as np -import pybullet as p -import pickle -import matplotlib.pyplot as plt -import gym -from gym import error, spaces, utils -from gym.utils import seeding -from gym.spaces import Box, Dict -import torch -import torch.nn as nn -import ray -from ray import tune -from ray.tune import register_env -from ray.rllib.agents import ppo -from ray.rllib.agents.ppo import PPOTrainer, PPOTFPolicy -from ray.rllib.examples.policy.random_policy import RandomPolicy -from ray.rllib.utils.test_utils import check_learning_achieved -from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 -from ray.rllib.agents.callbacks import DefaultCallbacks -from ray.rllib.models.torch.fcnet import FullyConnectedNetwork -from ray.rllib.models import ModelCatalog -from ray.rllib.policy.sample_batch import SampleBatch - -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary -from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary -from gym_pybullet_drones.envs.multi_agent_rl.MeetupAviary import MeetupAviary -from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType -from gym_pybullet_drones.utils.Logger import Logger -from gym_pybullet_drones.utils.utils import sync - -import shared_constants - -OWN_OBS_VEC_SIZE = None # Modified at runtime -ACTION_VEC_SIZE = None # Modified at runtime - -############################################################ -class CustomTorchCentralizedCriticModel(TorchModelV2, nn.Module): - """Multi-agent model that implements a centralized value function. - - It assumes the observation is a dict with 'own_obs' and 'opponent_obs', the - former of which can be used for computing actions (i.e., decentralized - execution), and the latter for optimization (i.e., centralized learning). - - This model has two parts: - - An action model that looks at just 'own_obs' to compute actions - - A value model that also looks at the 'opponent_obs' / 'opponent_action' - to compute the value (it does this by using the 'obs_flat' tensor). - """ - - def __init__(self, obs_space, action_space, num_outputs, model_config, name): - TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) - nn.Module.__init__(self) - self.action_model = FullyConnectedNetwork( - Box(low=-1, high=1, shape=(OWN_OBS_VEC_SIZE, )), - action_space, - num_outputs, - model_config, - name + "_action" - ) - self.value_model = FullyConnectedNetwork( - obs_space, - action_space, - 1, - model_config, - name + "_vf" - ) - self._model_in = None - - def forward(self, input_dict, state, seq_lens): - self._model_in = [input_dict["obs_flat"], state, seq_lens] - return self.action_model({"obs": input_dict["obs"]["own_obs"]}, state, seq_lens) - - def value_function(self): - value_out, _ = self.value_model({"obs": self._model_in[0]}, self._model_in[1], self._model_in[2]) - return torch.reshape(value_out, [-1]) - -############################################################ -class FillInActions(DefaultCallbacks): - def on_postprocess_trajectory(self, worker, episode, agent_id, policy_id, policies, postprocessed_batch, original_batches, **kwargs): - to_update = postprocessed_batch[SampleBatch.CUR_OBS] - other_id = 1 if agent_id == 0 else 0 - action_encoder = ModelCatalog.get_preprocessor_for_space( - # Box(-np.inf, np.inf, (ACTION_VEC_SIZE,), np.float32) # Unbounded - Box(-1, 1, (ACTION_VEC_SIZE,), np.float32) # Bounded - ) - _, opponent_batch = original_batches[other_id] - # opponent_actions = np.array([action_encoder.transform(a) for a in opponent_batch[SampleBatch.ACTIONS]]) # Unbounded - opponent_actions = np.array([action_encoder.transform(np.clip(a, -1, 1)) for a in opponent_batch[SampleBatch.ACTIONS]]) # Bounded - to_update[:, -ACTION_VEC_SIZE:] = opponent_actions - -############################################################ -def central_critic_observer(agent_obs, **kw): - new_obs = { - 0: { - "own_obs": agent_obs[0], - "opponent_obs": agent_obs[1], - "opponent_action": np.zeros(ACTION_VEC_SIZE), # Filled in by FillInActions - }, - 1: { - "own_obs": agent_obs[1], - "opponent_obs": agent_obs[0], - "opponent_action": np.zeros(ACTION_VEC_SIZE), # Filled in by FillInActions - }, - } - return new_obs - -############################################################ -if __name__ == "__main__": - - #### Define and parse (optional) arguments for the script ## - parser = argparse.ArgumentParser(description='Multi-agent reinforcement learning experiments script') - parser.add_argument('--exp', type=str, help='The experiment folder written as ./results/save------', metavar='') - ARGS = parser.parse_args() - - #### Parameters to recreate the environment ################ - NUM_DRONES = int(ARGS.exp.split("-")[2]) - OBS = ObservationType.KIN if ARGS.exp.split("-")[4] == 'kin' else ObservationType.RGB - - # Parse ActionType instance from file name - action_name = ARGS.exp.split("-")[5] - ACT = [action for action in ActionType if action.value == action_name] - if len(ACT) != 1: - raise AssertionError("Result file could have gotten corrupted. Extracted action type does not match any of the existing ones.") - ACT = ACT.pop() - - #### Constants, and errors ################################# - if OBS == ObservationType.KIN: - OWN_OBS_VEC_SIZE = 12 - elif OBS == ObservationType.RGB: - print("[ERROR] ObservationType.RGB for multi-agent systems not yet implemented") - exit() - else: - print("[ERROR] unknown ObservationType") - exit() - if ACT in [ActionType.ONE_D_RPM, ActionType.ONE_D_DYN, ActionType.ONE_D_PID]: - ACTION_VEC_SIZE = 1 - elif ACT in [ActionType.RPM, ActionType.DYN, ActionType.VEL]: - ACTION_VEC_SIZE = 4 - elif ACT == ActionType.PID: - ACTION_VEC_SIZE = 3 - else: - print("[ERROR] unknown ActionType") - exit() - - #### Initialize Ray Tune ################################### - ray.shutdown() - ray.init(ignore_reinit_error=True) - - #### Register the custom centralized critic model ########## - ModelCatalog.register_custom_model("cc_model", CustomTorchCentralizedCriticModel) - - #### Register the environment ############################## - temp_env_name = "this-aviary-v0" - if ARGS.exp.split("-")[1] == 'flock': - register_env(temp_env_name, lambda _: FlockAviary(num_drones=NUM_DRONES, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=OBS, - act=ACT - ) - ) - elif ARGS.exp.split("-")[1] == 'leaderfollower': - register_env(temp_env_name, lambda _: LeaderFollowerAviary(num_drones=NUM_DRONES, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=OBS, - act=ACT - ) - ) - elif ARGS.exp.split("-")[1] == 'meetup': - register_env(temp_env_name, lambda _: MeetupAviary(num_drones=NUM_DRONES, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=OBS, - act=ACT - ) - ) - else: - print("[ERROR] environment not yet implemented") - exit() - - #### Unused env to extract the act and obs spaces ########## - if ARGS.exp.split("-")[1] == 'flock': - temp_env = FlockAviary(num_drones=NUM_DRONES, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=OBS, - act=ACT - ) - elif ARGS.exp.split("-")[1] == 'leaderfollower': - temp_env = LeaderFollowerAviary(num_drones=NUM_DRONES, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=OBS, - act=ACT - ) - elif ARGS.exp.split("-")[1] == 'meetup': - temp_env = MeetupAviary(num_drones=NUM_DRONES, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=OBS, - act=ACT - ) - else: - print("[ERROR] environment not yet implemented") - exit() - observer_space = Dict({ - "own_obs": temp_env.observation_space[0], - "opponent_obs": temp_env.observation_space[0], - "opponent_action": temp_env.action_space[0], - }) - action_space = temp_env.action_space[0] - - #### Set up the trainer's config ########################### - config = ppo.DEFAULT_CONFIG.copy() # For the default config, see github.com/ray-project/ray/blob/master/rllib/agents/trainer.py - config = { - "env": temp_env_name, - "num_workers": 0, #0+ARGS.workers, - "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0 - "batch_mode": "complete_episodes", - "callbacks": FillInActions, - "framework": "torch", - } - - #### Set up the model parameters of the trainer's config ### - config["model"] = { - "custom_model": "cc_model", - } - - #### Set up the multiagent params of the trainer's config ## - config["multiagent"] = { - "policies": { - "pol0": (None, observer_space, action_space, {"agent_id": 0,}), - "pol1": (None, observer_space, action_space, {"agent_id": 1,}), - }, - "policy_mapping_fn": lambda x: "pol0" if x == 0 else "pol1", # # Function mapping agent ids to policy ids - "observation_fn": central_critic_observer, # See rllib/evaluation/observation_function.py for more info - } - - #### Restore agent ######################################### - agent = ppo.PPOTrainer(config=config) - with open(ARGS.exp+'/checkpoint.txt', 'r+') as f: - checkpoint = f.read() - agent.restore(checkpoint) - - #### Extract and print policies ############################ - policy0 = agent.get_policy("pol0") - print("action model 0", policy0.model.action_model) - print("value model 0", policy0.model.value_model) - policy1 = agent.get_policy("pol1") - print("action model 1", policy1.model.action_model) - print("value model 1", policy1.model.value_model) - - #### Create test environment ############################### - if ARGS.exp.split("-")[1] == 'flock': - test_env = FlockAviary(num_drones=NUM_DRONES, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=OBS, - act=ACT, - gui=True, - record=False - ) - elif ARGS.exp.split("-")[1] == 'leaderfollower': - test_env = LeaderFollowerAviary(num_drones=NUM_DRONES, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=OBS, - act=ACT, - gui=True, - record=False - ) - elif ARGS.exp.split("-")[1] == 'meetup': - test_env = MeetupAviary(num_drones=NUM_DRONES, - aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, - obs=OBS, - act=ACT, - gui=True, - record=False - ) - else: - print("[ERROR] environment not yet implemented") - exit() - - #### Show, record a video, and log the model's performance # - obs = test_env.reset() - logger = Logger(logging_freq_hz=int(test_env.SIM_FREQ/test_env.AGGR_PHY_STEPS), - num_drones=NUM_DRONES - ) - if ACT in [ActionType.ONE_D_RPM, ActionType.ONE_D_DYN, ActionType.ONE_D_PID]: - action = {i: np.array([0]) for i in range(NUM_DRONES)} - elif ACT in [ActionType.RPM, ActionType.DYN, ActionType.VEL]: - action = {i: np.array([0, 0, 0, 0]) for i in range(NUM_DRONES)} - elif ACT==ActionType.PID: - action = {i: np.array([0, 0, 0]) for i in range(NUM_DRONES)} - else: - print("[ERROR] unknown ActionType") - exit() - start = time.time() - for i in range(6*int(test_env.SIM_FREQ/test_env.AGGR_PHY_STEPS)): # Up to 6'' - #### Deploy the policies ################################### - temp = {} - temp[0] = policy0.compute_single_action(np.hstack([action[1], obs[1], obs[0]])) # Counterintuitive order, check params.json - temp[1] = policy1.compute_single_action(np.hstack([action[0], obs[0], obs[1]])) - action = {0: temp[0][0], 1: temp[1][0]} - obs, reward, done, info = test_env.step(action) - test_env.render() - if OBS==ObservationType.KIN: - for j in range(NUM_DRONES): - logger.log(drone=j, - timestamp=i/test_env.SIM_FREQ, - state= np.hstack([obs[j][0:3], np.zeros(4), obs[j][3:15], np.resize(action[j], (4))]), - control=np.zeros(12) - ) - sync(np.floor(i*test_env.AGGR_PHY_STEPS), start, test_env.TIMESTEP) - # if done["__all__"]: obs = test_env.reset() # OPTIONAL EPISODE HALT - test_env.close() - logger.save_as_csv("ma") # Optional CSV save - logger.plot() - - #### Shut down Ray ######################################### - ray.shutdown() diff --git a/experiments/learning/sa_script.bash b/experiments/sa_script.bash similarity index 100% rename from experiments/learning/sa_script.bash rename to experiments/sa_script.bash diff --git a/experiments/learning/sa_script.slrm b/experiments/sa_script.slrm similarity index 100% rename from experiments/learning/sa_script.slrm rename to experiments/sa_script.slrm diff --git a/experiments/learning/shared_constants.py b/experiments/shared_constants.py similarity index 100% rename from experiments/learning/shared_constants.py rename to experiments/shared_constants.py diff --git a/experiments/learning/singleagent.py b/experiments/singleagent.py similarity index 100% rename from experiments/learning/singleagent.py rename to experiments/singleagent.py diff --git a/experiments/learning/test_singleagent.py b/experiments/test_singleagent.py similarity index 100% rename from experiments/learning/test_singleagent.py rename to experiments/test_singleagent.py diff --git a/gym_pybullet_drones/envs/DynAviary.py b/gym_pybullet_drones/envs/DynAviary.py deleted file mode 100644 index 744aa9c8e..000000000 --- a/gym_pybullet_drones/envs/DynAviary.py +++ /dev/null @@ -1,219 +0,0 @@ -import numpy as np -from gym import spaces - -from gym_pybullet_drones.envs.BaseAviary import BaseAviary -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.utils.utils import nnlsRPM - -class DynAviary(BaseAviary): - """Multi-drone environment class for control with desired thrust and torques.""" - - ################################################################################ - - def __init__(self, - drone_model: DroneModel=DroneModel.CF2X, - num_drones: int=1, - neighbourhood_radius: float=np.inf, - initial_xyzs=None, - initial_rpys=None, - physics: Physics=Physics.PYB, - freq: int=240, - aggregate_phy_steps: int=1, - gui=False, - record=False, - obstacles=False, - user_debug_gui=True, - output_folder='results' - ): - """Initialization of an aviary controlled by desired thrust and torques. - - Attribute `dynamics_attributes` is automatically set to True when calling - the superclass `__init__()` method. - - Parameters - ---------- - drone_model : DroneModel, optional - The desired drone type (detailed in an .urdf file in folder `assets`). - num_drones : int, optional - The desired number of drones in the aviary. - neighbourhood_radius : float, optional - Radius used to compute the drones' adjacency matrix, in meters. - initial_xyzs: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones. - initial_rpys: ndarray | None, optional - (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians). - physics : Physics, optional - The desired implementation of PyBullet physics/custom dynamics. - freq : int, optional - The frequency (Hz) at which the physics engine steps. - aggregate_phy_steps : int, optional - The number of physics steps within one call to `BaseAviary.step()`. - gui : bool, optional - Whether to use PyBullet's GUI. - record : bool, optional - Whether to save a video of the simulation in folder `files/videos/`. - obstacles : bool, optional - Whether to add obstacles to the simulation. - user_debug_gui : bool, optional - Whether to draw the drones' axes and the GUI RPMs sliders. - - """ - super().__init__(drone_model=drone_model, - num_drones=num_drones, - neighbourhood_radius=neighbourhood_radius, - initial_xyzs=initial_xyzs, - initial_rpys=initial_rpys, - physics=physics, - freq=freq, - aggregate_phy_steps=aggregate_phy_steps, - gui=gui, - record=record, - obstacles=obstacles, - user_debug_gui=user_debug_gui, - dynamics_attributes=True, - output_folder=output_folder - ) - - - ################################################################################ - - def _actionSpace(self): - """Returns the action space of the environment. - - Returns - ------- - dict[str, ndarray] - A Dict of Box(4,) with NUM_DRONES entries, - indexed by drone Id in string format. - - """ - #### Action vector ######## Thrust X Torque Y Torque Z Torque - act_lower_bound = np.array([0., -self.MAX_XY_TORQUE, -self.MAX_XY_TORQUE, -self.MAX_Z_TORQUE]) - act_upper_bound = np.array([self.MAX_THRUST, self.MAX_XY_TORQUE, self.MAX_XY_TORQUE, self.MAX_Z_TORQUE]) - return spaces.Dict({str(i): spaces.Box(low=act_lower_bound, - high=act_upper_bound, - dtype=np.float32 - ) for i in range(self.NUM_DRONES)}) - - ################################################################################ - - def _observationSpace(self): - """Returns the observation space of the environment. - - Returns - ------- - dict[str, dict[str, ndarray]] - A Dict with NUM_DRONES entries indexed by Id in string format, - each a Dict in the form {Box(20,), MultiBinary(NUM_DRONES)}. - - """ - #### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ P0 P1 P2 P3 - obs_lower_bound = np.array([-np.inf, -np.inf, 0., -1., -1., -1., -1., -np.pi, -np.pi, -np.pi, -np.inf, -np.inf, -np.inf, -np.inf, -np.inf, -np.inf, 0., 0., 0., 0.]) - obs_upper_bound = np.array([np.inf, np.inf, np.inf, 1., 1., 1., 1., np.pi, np.pi, np.pi, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, self.MAX_RPM, self.MAX_RPM, self.MAX_RPM, self.MAX_RPM]) - return spaces.Dict({str(i): spaces.Dict({"state": spaces.Box(low=obs_lower_bound, - high=obs_upper_bound, - dtype=np.float32 - ), - "neighbors": spaces.MultiBinary(self.NUM_DRONES) - }) for i in range(self.NUM_DRONES)}) - - ################################################################################ - - def _computeObs(self): - """Returns the current observation of the environment. - - For the value of key "state", see the implementation of `_getDroneStateVector()`, - the value of key "neighbors" is the drone's own row of the adjacency matrix. - - Returns - ------- - dict[str, dict[str, ndarray]] - A Dict with NUM_DRONES entries indexed by Id in string format, - each a Dict in the form {Box(20,), MultiBinary(NUM_DRONES)}. - - """ - adjacency_mat = self._getAdjacencyMatrix() - return {str(i): {"state": self._getDroneStateVector(i), "neighbors": adjacency_mat[i,:]} for i in range(self.NUM_DRONES) } - - ################################################################################ - - def _preprocessAction(self, - action - ): - """Pre-processes the action passed to `.step()` into motors' RPMs. - - Solves desired thrust and torques using NNLS and converts a dictionary into a 2D array. - - Parameters - ---------- - action : dict[str, ndarray] - The input action each drone (desired thrust and torques), to be translated into RPMs. - - Returns - ------- - ndarray - (NUM_DRONES, 4)-shaped array of ints containing to clipped RPMs - commanded to the 4 motors of each drone. - - """ - clipped_action = np.zeros((self.NUM_DRONES, 4)) - for k, v in action.items(): - clipped_action[int(k), :] = nnlsRPM(thrust=v[0], - x_torque=v[1], - y_torque=v[2], - z_torque=v[3], - counter=self.step_counter, - max_thrust=self.MAX_THRUST, - max_xy_torque=self.MAX_XY_TORQUE, - max_z_torque=self.MAX_Z_TORQUE, - a=self.A, - inv_a=self.INV_A, - b_coeff=self.B_COEFF, - gui=self.GUI - ) - return clipped_action - - ################################################################################ - - def _computeReward(self): - """Computes the current reward value(s). - - Unused as this subclass is not meant for reinforcement learning. - - Returns - ------- - int - Dummy value. - - """ - return -1 - - ################################################################################ - - def _computeDone(self): - """Computes the current done value(s). - - Unused as this subclass is not meant for reinforcement learning. - - Returns - ------- - bool - Dummy value. - - """ - return False - - ################################################################################ - - def _computeInfo(self): - """Computes the current info dict(s). - - Unused as this subclass is not meant for reinforcement learning. - - Returns - ------- - dict[str, int] - Dummy value. - - """ - return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years diff --git a/gym_pybullet_drones/envs/RLAviary.py b/gym_pybullet_drones/envs/RLAviary.py new file mode 100644 index 000000000..bbf5bdb4b --- /dev/null +++ b/gym_pybullet_drones/envs/RLAviary.py @@ -0,0 +1,2 @@ +'''TBD +''' diff --git a/gym_pybullet_drones/examples/compare.py b/gym_pybullet_drones/examples/compare.py deleted file mode 100644 index 96c0e5be9..000000000 --- a/gym_pybullet_drones/examples/compare.py +++ /dev/null @@ -1,140 +0,0 @@ -"""Script comparing a gym_pybullet_drones simulation to a trace file. - -It is meant to compare/validate the behaviour of a pyhsics implementation. - -Example -------- -In a terminal, run as: - - $ python compare.py - -Notes ------ -The comparison is along a 2D trajectory in the X-Z plane, between x == +1 and -1. - -""" -import time -import argparse -import pickle -import numpy as np -import pkg_resources - -from gym_pybullet_drones.utils.utils import sync, str2bool -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary -from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl -from gym_pybullet_drones.utils.Logger import Logger - -DEFAULT_PHYICS = Physics('pyb') -DEFAULT_GUI = False -DEFAULT_RECORD_VIDEO = False -DEFAULT_TRACE_FILE = pkg_resources.resource_filename('gym_pybullet_drones', 'assets/example_trace.pkl') -DEFAULT_OUTPUT_FOLDER = 'results' -DEFAULT_COLAB = False - -def run( - physics=DEFAULT_PHYICS, - gui=DEFAULT_GUI, - record_video=DEFAULT_RECORD_VIDEO, - trace_file=DEFAULT_TRACE_FILE, - output_folder=DEFAULT_OUTPUT_FOLDER, - plot=True, - colab=DEFAULT_COLAB - ): - #### Load a trace and control reference from a .pkl file ### - with open(trace_file, 'rb') as in_file: - TRACE_TIMESTAMPS, TRACE_DATA, TRACE_CTRL_REFERENCE, _, _, _ = pickle.load(in_file) - - #### Compute trace's parameters ############################ - DURATION_SEC = int(TRACE_TIMESTAMPS[-1]) - SIMULATION_FREQ_HZ = int(len(TRACE_TIMESTAMPS)/TRACE_TIMESTAMPS[-1]) - - #### Initialize the simulation ############################# - env = CtrlAviary(drone_model=DroneModel.CF2X, - num_drones=1, - initial_xyzs=np.array([0, 0, .1]).reshape(1, 3), - physics=physics, - freq=SIMULATION_FREQ_HZ, - gui=gui, - record=record_video, - obstacles=False - ) - INITIAL_STATE = env.reset() - action = {"0": np.zeros(4)} - pos_err = 9999. - - #### TRACE_FILE starts at [0,0,0], sim starts at [0,0,INITIAL_STATE[2]] - TRACE_CTRL_REFERENCE[:, 2] = INITIAL_STATE["0"]["state"][2] - - #### Initialize the logger ################################# - logger = Logger(logging_freq_hz=SIMULATION_FREQ_HZ, - num_drones=2, - duration_sec=DURATION_SEC, - output_folder=output_folder, - colab=colab - ) - - #### Initialize the controller ############################# - ctrl = DSLPIDControl(drone_model=env.DRONE_MODEL) - - #### Run the comparison #################################### - START = time.time() - for i in range(DURATION_SEC*env.SIM_FREQ): - - #### Step the simulation ################################### - obs, reward, done, info = env.step(action) - - #### Compute next action using the set points from the trace - action["0"], pos_err, yaw_err = ctrl.computeControlFromState(control_timestep=env.TIMESTEP, - state=obs["0"]["state"], - target_pos=TRACE_CTRL_REFERENCE[i, 0:3], - target_vel=TRACE_CTRL_REFERENCE[i, 3:6] - ) - - #### Re-arrange the trace for consistency with the logger - trace_obs = np.hstack([TRACE_DATA[i, 0:3], np.zeros(4), TRACE_DATA[i, 6:9], TRACE_DATA[i, 3:6], TRACE_DATA[i, 9:12], TRACE_DATA[i, 12:16]]) - - #### Log the trace ######################################### - logger.log(drone=0, - timestamp=TRACE_TIMESTAMPS[i], - state=trace_obs, - control=np.hstack([TRACE_CTRL_REFERENCE[i, :], np.zeros(6)]) - ) - - #### Log the simulation #################################### - logger.log(drone=1, - timestamp=i/env.SIM_FREQ, - state=obs["0"]["state"], - control=np.hstack([TRACE_CTRL_REFERENCE[i, :], np.zeros(6)]) - ) - - #### Printout ############################################## - if i%env.SIM_FREQ == 0: - env.render() - - #### Sync the simulation ################################### - if gui: - sync(i, START, env.TIMESTEP) - - #### Close the environment ################################# - env.close() - - #### Save the simulation results ########################### - logger.save() - - #### Plot the simulation results ########################### - if plot: - logger.plot(pwm=True) - -if __name__ == "__main__": - #### Define and parse (optional) arguments for the script ## - parser = argparse.ArgumentParser(description='Trace comparison script using CtrlAviary and DSLPIDControl') - parser.add_argument('--physics', default=DEFAULT_PHYICS, type=Physics, help='Physics updates (default: PYB)', metavar='', choices=Physics) - parser.add_argument('--gui', default=DEFAULT_GUI, type=str2bool, help='Whether to use PyBullet GUI (default: False)', metavar='') - parser.add_argument('--record_video', default=DEFAULT_RECORD_VIDEO, type=str2bool, help='Whether to record a video (default: False)', metavar='') - parser.add_argument('--trace_file', default=DEFAULT_TRACE_FILE, type=str, help='Pickle file with the trace to compare to (default: "example_trace.pkl")', metavar='') - parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='') - parser.add_argument('--colab', default=DEFAULT_COLAB, type=bool, help='Whether example is being run by a notebook (default: "False")', metavar='') - ARGS = parser.parse_args() - - run(**vars(ARGS)) \ No newline at end of file diff --git a/gym_pybullet_drones/examples/groundeffect.py b/gym_pybullet_drones/examples/groundeffect.py deleted file mode 100644 index 52fe3f2f3..000000000 --- a/gym_pybullet_drones/examples/groundeffect.py +++ /dev/null @@ -1,167 +0,0 @@ -"""Script demonstrating the ground effect contribution. - -The simulation is run by a `CtrlAviary` environment. - -Example -------- -In a terminal, run as: - - $ python groundeffect.py - -Notes ------ -The drone altitude tracks a sinusoid, near the ground plane. - -""" -import os -import time -import argparse -from datetime import datetime -import pdb -import math -import random -import numpy as np -import pybullet as p -import matplotlib.pyplot as plt - -from gym_pybullet_drones.utils.enums import DroneModel, Physics -from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary -from gym_pybullet_drones.envs.VisionAviary import VisionAviary -from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl -from gym_pybullet_drones.control.SimplePIDControl import SimplePIDControl -from gym_pybullet_drones.utils.Logger import Logger -from gym_pybullet_drones.utils.utils import sync, str2bool - -DEFAULT_GUI = True -DEFAULT_RECORD_VIDEO = False -DEFAULT_PLOT = True -DEFAULT_USER_DEBUG_GUI = False -DEFAULT_AGGREGATE = True -DEFAULT_OBSTACLES = True -DEFAULT_SIMULATION_FREQ_HZ = 240 -DEFAULT_CONTROL_FREQ_HZ = 240 -DEFAULT_DURATION_SEC = 4 -DEFAULT_OUTPUT_FOLDER = 'results' -DEFAULT_COLAB = False - -def run( - gui=DEFAULT_GUI, - record_video=DEFAULT_RECORD_VIDEO, - plot=DEFAULT_PLOT, - user_debug_gui=DEFAULT_USER_DEBUG_GUI, - aggregate=DEFAULT_AGGREGATE, - obstacles=DEFAULT_OBSTACLES, - simulation_freq_hz=DEFAULT_SIMULATION_FREQ_HZ, - control_freq_hz=DEFAULT_CONTROL_FREQ_HZ, - duration_sec=DEFAULT_DURATION_SEC, - output_folder=DEFAULT_OUTPUT_FOLDER, - colab=DEFAULT_COLAB - ): - - #### Initialize the simulation ############################# - INIT_XYZ = np.array([0, 0, 0.014]).reshape(1,3) - AGGR_PHY_STEPS = int(simulation_freq_hz/control_freq_hz) if aggregate else 1 - - #### Initialize a vertical trajectory ###################### - PERIOD = 4 - NUM_WP = control_freq_hz*PERIOD - TARGET_POS = np.zeros((NUM_WP,3)) - for i in range(NUM_WP): - TARGET_POS[i, :] = INIT_XYZ[0, 0], INIT_XYZ[0, 1], INIT_XYZ[0, 2] + 0.15 * (np.sin((i/NUM_WP)*(2*np.pi)) + 1) - wp_counter = 0 - - #### Create the environment ################################ - env = CtrlAviary(drone_model=DroneModel.CF2X, - num_drones=1, - initial_xyzs=INIT_XYZ, - physics=Physics.PYB_GND, - # physics=Physics.PYB, # For comparison - neighbourhood_radius=10, - freq=simulation_freq_hz, - aggregate_phy_steps=AGGR_PHY_STEPS, - gui=gui, - record=record_video, - obstacles=obstacles, - user_debug_gui=user_debug_gui - ) - - #### Obtain the PyBullet Client ID from the environment #### - PYB_CLIENT = env.getPyBulletClient() - - #### Initialize the logger ################################# - logger = Logger(logging_freq_hz=int(simulation_freq_hz/AGGR_PHY_STEPS), - num_drones=1, - output_folder=output_folder, - colab=colab - ) - - #### Initialize the controller ############################# - ctrl = DSLPIDControl(drone_model=DroneModel.CF2X) - - #### Run the simulation #################################### - CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ/control_freq_hz)) - action = {"0": np.array([0,0,0,0])} - START = time.time() - for i in range(0, int(duration_sec*env.SIM_FREQ), AGGR_PHY_STEPS): - - #### Make it rain rubber ducks ############################# - # if i/env.SIM_FREQ>5 and i%10==0 and i/env.SIM_FREQ<10: p.loadURDF("duck_vhacd.urdf", [0+random.gauss(0, 0.3),-0.5+random.gauss(0, 0.3),3], p.getQuaternionFromEuler([random.randint(0,360),random.randint(0,360),random.randint(0,360)]), physicsClientId=PYB_CLIENT) - - #### Step the simulation ################################### - obs, reward, done, info = env.step(action) - - #### Compute control at the desired frequency ############## - if i%CTRL_EVERY_N_STEPS == 0: - - #### Compute control for the current way point ############# - action["0"], _, _ = ctrl.computeControlFromState(control_timestep=CTRL_EVERY_N_STEPS*env.TIMESTEP, - state=obs["0"]["state"], - target_pos=TARGET_POS[wp_counter, :], - ) - - #### Go to the next way point and loop ##################### - wp_counter = wp_counter + 1 if wp_counter < (NUM_WP-1) else 0 - - #### Log the simulation #################################### - logger.log(drone=0, - timestamp=i/env.SIM_FREQ, - state= obs["0"]["state"], - control=np.hstack([TARGET_POS[wp_counter, :], np.zeros(9)]) - ) - - #### Printout ############################################## - if i%env.SIM_FREQ == 0: - env.render() - - #### Sync the simulation ################################### - if gui: - sync(i, START, env.TIMESTEP) - - #### Close the environment ################################# - env.close() - - #### Save the simulation results ########################### - logger.save() - logger.save_as_csv("gnd") # Optional CSV save - - #### Plot the simulation results ########################### - if plot: - logger.plot() - -if __name__ == "__main__": - #### Define and parse (optional) arguments for the script ## - parser = argparse.ArgumentParser(description='Ground effect script using CtrlAviary and DSLPIDControl') - parser.add_argument('--gui', default=DEFAULT_GUI, type=str2bool, help='Whether to use PyBullet GUI (default: True)', metavar='') - parser.add_argument('--record_video', default=DEFAULT_RECORD_VIDEO, type=str2bool, help='Whether to record a video (default: False)', metavar='') - parser.add_argument('--plot', default=DEFAULT_PLOT, type=str2bool, help='Whether to plot the simulation results (default: True)', metavar='') - parser.add_argument('--user_debug_gui', default=DEFAULT_USER_DEBUG_GUI, type=str2bool, help='Whether to add debug lines and parameters to the GUI (default: False)', metavar='') - parser.add_argument('--aggregate', default=DEFAULT_AGGREGATE, type=str2bool, help='Whether to aggregate physics steps (default: False)', metavar='') - parser.add_argument('--obstacles', default=DEFAULT_OBSTACLES, type=str2bool, help='Whether to add obstacles to the environment (default: True)', metavar='') - parser.add_argument('--simulation_freq_hz', default=DEFAULT_SIMULATION_FREQ_HZ, type=int, help='Simulation frequency in Hz (default: 240)', metavar='') - parser.add_argument('--control_freq_hz', default=DEFAULT_CONTROL_FREQ_HZ, type=int, help='Control frequency in Hz (default: 48)', metavar='') - parser.add_argument('--duration_sec', default=DEFAULT_DURATION_SEC, type=int, help='Duration of the simulation in seconds (default: 5)', metavar='') - parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='') - parser.add_argument('--colab', default=DEFAULT_COLAB, type=bool, help='Whether example is being run by a notebook (default: "False")', metavar='') - ARGS = parser.parse_args() - - run(**vars(ARGS)) diff --git a/gym_pybullet_drones/examples/learn.py b/gym_pybullet_drones/examples/learning/learn.py similarity index 100% rename from gym_pybullet_drones/examples/learn.py rename to gym_pybullet_drones/examples/learning/learn.py diff --git a/gym_pybullet_drones/examples/learning/sa_script.bash b/gym_pybullet_drones/examples/learning/sa_script.bash new file mode 100755 index 000000000..1bfd99a82 --- /dev/null +++ b/gym_pybullet_drones/examples/learning/sa_script.bash @@ -0,0 +1,23 @@ +#!/bin/bash + +# declare -a env_list=( 'takeoff' 'hover' 'flythrugate' ) +# declare -a algo_list=( 'a2c' 'ppo' 'sac' 'td3' 'ddpg' ) +# declare -a obs_list=( 'kin' 'rgb' ) +# declare -a act_list=( 'rpm' 'dyn' 'pid' 'one_d_rpm' 'one_d_dyn' 'one_d_pid' ) + +# Note: 3*5*2*2=60 jobs; do not launch at once + +declare -a env_list=( 'hover' ) +declare -a algo_list=( 'ppo' 'sac' 'ddpg' ) +declare -a obs_list=( 'kin' 'rgb' ) +declare -a act_list=( 'one_d_rpm' ) + +for env in ${env_list[@]}; do + for algo in ${algo_list[@]}; do + for obs in ${obs_list[@]}; do + for act in ${act_list[@]}; do + sbatch --export=env=$env,algo=$algo,obs=$obs,act=$act sa_script.slrm + done + done + done +done diff --git a/gym_pybullet_drones/examples/learning/sa_script.slrm b/gym_pybullet_drones/examples/learning/sa_script.slrm new file mode 100644 index 000000000..1e9464ba0 --- /dev/null +++ b/gym_pybullet_drones/examples/learning/sa_script.slrm @@ -0,0 +1,53 @@ +#!/bin/bash + +# Use as sbatch --export=env="takeoff",algo="a2c",obs="kin",act="rpm" sa_script.slrm + +#SBATCH --job-name=pyb_drones +#SBATCH --output=/scratch/gobi1/jacopo/gym-pybullet-drones/experiments/learning/results/output-%N-%j.out # For output and error, use absolute paths that exists already +#SBATCH --error=/scratch/gobi1/jacopo/gym-pybullet-drones/experiments/learning/results/error-%N-%j.err # %N and %j will be replaced by the host name and job id, respectively +#SBATCH --open-mode=append +#SBATCH --partition=cpu # self-explanatory, set to your preference (e.g. gpu or cpu on MaRS, p100, t4, or cpu on Vaughan) +#SBATCH --cpus-per-task=16 # self-explanatory, set to your preference +#SBATCH --ntasks-per-node=1 +#SBATCH --mem=64G # self-explanatory, set to your preference +#SBATCH --gres=gpu:0 # NOTE: you need a GPU for CUDA support; self-explanatory, set to your preference +#SBATCH --nodes=1 +#SBATCH --qos=normal # for "high" and "deadline" QoS, refer to https://support.vectorinstitute.ai/AboutVaughan2 + +now=$(date +"%Y_%m_%d-%H_%M_%S") # current date + +log_file="/scratch/gobi1/jacopo/gym-pybullet-drones/experiments/learning/results/log-$env-$algo-$obs-$act-3D-$now.log" + +echo $SLURM_JOB_PARTITION >> $log_file # log the job partition + +echo $SLURM_JOB_ID >> $log_file # log the job id + +export LD_LIBRARY_PATH=/pkgs/cuda-10.1/lib64:/pkgs/cudnn-10.1-v7.6.3.30/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} # required for TensorFlow to see any GPU + +export PATH=/pkgs/anaconda3/bin:$PATH # exporting the binaries of your preferred version of conda + +source activate /scratch/gobi1/$USER/learning/ # NOTE: set the path to your conda environment path + +echo $CONDA_PREFIX >> $log_file # log the active conda environment + +python --version >> $log_file # log Python version + +# pip install --user --upgrade gym +# pip install --user --upgrade ray[rllib] +# pip install --user --upgrade pybullet +# pip install --user --upgrade stable-baselines3 + +cd /scratch/gobi1/$USER/gym-pybullet-drones/ +pip install --user --upgrade --verbose --editable . + +if [ "$algo" = "a2c" ] || [ "$algo" = "ppo" ]; then + + python /scratch/gobi1/$USER/gym-pybullet-drones/experiments/learning/singleagent.py --env $env --algo $algo --obs $obs --act $act --cpu 4 >> $log_file # the script, with its standard output appended to gym_py_drones_job.log + +else + + python /scratch/gobi1/$USER/gym-pybullet-drones/experiments/learning/singleagent.py --env $env --algo $algo --obs $obs --act $act --cpu 1 >> $log_file # the script, with its standard output appended to gym_py_drones_job.log + +fi + +source deactivate diff --git a/gym_pybullet_drones/examples/learning/shared_constants.py b/gym_pybullet_drones/examples/learning/shared_constants.py new file mode 100644 index 000000000..f62086b9a --- /dev/null +++ b/gym_pybullet_drones/examples/learning/shared_constants.py @@ -0,0 +1,2 @@ +AGGR_PHY_STEPS = 5 +"""int: Aggregate PyBullet/physics steps within a single call to env.step().""" diff --git a/gym_pybullet_drones/examples/learning/singleagent.py b/gym_pybullet_drones/examples/learning/singleagent.py new file mode 100644 index 000000000..15ea0c9a3 --- /dev/null +++ b/gym_pybullet_drones/examples/learning/singleagent.py @@ -0,0 +1,283 @@ +"""Learning script for single agent problems. + +Agents are based on `stable_baselines3`'s implementation of A2C, PPO SAC, TD3, DDPG. + +Example +------- +To run the script, type in a terminal: + + $ python singleagent.py --env --algo --obs --act --cpu + +Notes +----- +Use: + + $ tensorboard --logdir ./results/save-----/tb/ + +To check the tensorboard results at: + + http://localhost:6006/ + +""" +import os +import time +from datetime import datetime +from sys import platform +import argparse +import subprocess +import numpy as np +import gym +import torch +from stable_baselines3.common.env_checker import check_env +from stable_baselines3.common.cmd_util import make_vec_env # Module cmd_util will be renamed to env_util https://github.com/DLR-RM/stable-baselines3/pull/197 +from stable_baselines3.common.vec_env import SubprocVecEnv, VecTransposeImage +from stable_baselines3.common.utils import set_random_seed +from stable_baselines3 import A2C +from stable_baselines3 import PPO +from stable_baselines3 import SAC +from stable_baselines3 import TD3 +from stable_baselines3 import DDPG +from stable_baselines3.common.policies import ActorCriticPolicy as a2cppoMlpPolicy +from stable_baselines3.common.policies import ActorCriticCnnPolicy as a2cppoCnnPolicy +from stable_baselines3.sac.policies import SACPolicy as sacMlpPolicy +from stable_baselines3.sac import CnnPolicy as sacCnnPolicy +from stable_baselines3.td3 import MlpPolicy as td3ddpgMlpPolicy +from stable_baselines3.td3 import CnnPolicy as td3ddpgCnnPolicy +from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback, StopTrainingOnRewardThreshold + +from gym_pybullet_drones.envs.single_agent_rl.TakeoffAviary import TakeoffAviary +from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary +from gym_pybullet_drones.envs.single_agent_rl.FlyThruGateAviary import FlyThruGateAviary +from gym_pybullet_drones.envs.single_agent_rl.TuneAviary import TuneAviary +from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType + +import shared_constants + +EPISODE_REWARD_THRESHOLD = -0 # Upperbound: rewards are always negative, but non-zero +"""float: Reward threshold to halt the script.""" + +DEFAULT_ENV = 'hover' +DEFAULT_ALGO = 'ppo' +DEFAULT_OBS = ObservationType('kin') +DEFAULT_ACT = ActionType('one_d_rpm') +DEFAULT_CPU = 1 +DEFAULT_STEPS = 35000 +DEFAULT_OUTPUT_FOLDER = 'results' + +def run( + env=DEFAULT_ENV, + algo=DEFAULT_ALGO, + obs=DEFAULT_OBS, + act=DEFAULT_ACT, + cpu=DEFAULT_CPU, + steps=DEFAULT_STEPS, + output_folder=DEFAULT_OUTPUT_FOLDER +): + + #### Save directory ######################################## + filename = os.path.join(output_folder, 'save-'+env+'-'+algo+'-'+obs.value+'-'+act.value+'-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S")) + if not os.path.exists(filename): + os.makedirs(filename+'/') + + #### Print out current git commit hash ##################### + if (platform == "linux" or platform == "darwin") and ('GITHUB_ACTIONS' not in os.environ.keys()): + git_commit = subprocess.check_output(["git", "describe", "--tags"]).strip() + with open(filename+'/git_commit.txt', 'w+') as f: + f.write(str(git_commit)) + + #### Warning ############################################### + if env == 'tune' and act != ActionType.TUN: + print("\n\n\n[WARNING] TuneAviary is intended for use with ActionType.TUN\n\n\n") + if act == ActionType.ONE_D_RPM or act == ActionType.ONE_D_DYN or act == ActionType.ONE_D_PID: + print("\n\n\n[WARNING] Simplified 1D problem for debugging purposes\n\n\n") + #### Errors ################################################ + if not env in ['takeoff', 'hover']: + print("[ERROR] 1D action space is only compatible with Takeoff and HoverAviary") + exit() + if act == ActionType.TUN and env != 'tune' : + print("[ERROR] ActionType.TUN is only compatible with TuneAviary") + exit() + if algo in ['sac', 'td3', 'ddpg'] and cpu!=1: + print("[ERROR] The selected algorithm does not support multiple environments") + exit() + + #### Uncomment to debug slurm scripts ###################### + # exit() + + env_name = env+"-aviary-v0" + sa_env_kwargs = dict(aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act) + # train_env = gym.make(env_name, aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act) # single environment instead of a vectorized one + if env_name == "takeoff-aviary-v0": + train_env = make_vec_env(TakeoffAviary, + env_kwargs=sa_env_kwargs, + n_envs=cpu, + seed=0 + ) + if env_name == "hover-aviary-v0": + train_env = make_vec_env(HoverAviary, + env_kwargs=sa_env_kwargs, + n_envs=cpu, + seed=0 + ) + if env_name == "flythrugate-aviary-v0": + train_env = make_vec_env(FlyThruGateAviary, + env_kwargs=sa_env_kwargs, + n_envs=cpu, + seed=0 + ) + if env_name == "tune-aviary-v0": + train_env = make_vec_env(TuneAviary, + env_kwargs=sa_env_kwargs, + n_envs=cpu, + seed=0 + ) + print("[INFO] Action space:", train_env.action_space) + print("[INFO] Observation space:", train_env.observation_space) + # check_env(train_env, warn=True, skip_render_check=True) + + #### On-policy algorithms ################################## + onpolicy_kwargs = dict(activation_fn=torch.nn.ReLU, + net_arch=[512, 512, dict(vf=[256, 128], pi=[256, 128])] + ) # or None + if algo == 'a2c': + model = A2C(a2cppoMlpPolicy, + train_env, + policy_kwargs=onpolicy_kwargs, + tensorboard_log=filename+'/tb/', + verbose=1 + ) if obs == ObservationType.KIN else A2C(a2cppoCnnPolicy, + train_env, + policy_kwargs=onpolicy_kwargs, + tensorboard_log=filename+'/tb/', + verbose=1 + ) + if algo == 'ppo': + model = PPO(a2cppoMlpPolicy, + train_env, + policy_kwargs=onpolicy_kwargs, + tensorboard_log=filename+'/tb/', + verbose=1 + ) if obs == ObservationType.KIN else PPO(a2cppoCnnPolicy, + train_env, + policy_kwargs=onpolicy_kwargs, + tensorboard_log=filename+'/tb/', + verbose=1 + ) + + #### Off-policy algorithms ################################# + offpolicy_kwargs = dict(activation_fn=torch.nn.ReLU, + net_arch=[512, 512, 256, 128] + ) # or None # or dict(net_arch=dict(qf=[256, 128, 64, 32], pi=[256, 128, 64, 32])) + if algo == 'sac': + model = SAC(sacMlpPolicy, + train_env, + policy_kwargs=offpolicy_kwargs, + tensorboard_log=filename+'/tb/', + verbose=1 + ) if obs==ObservationType.KIN else SAC(sacCnnPolicy, + train_env, + policy_kwargs=offpolicy_kwargs, + tensorboard_log=filename+'/tb/', + verbose=1 + ) + if algo == 'td3': + model = TD3(td3ddpgMlpPolicy, + train_env, + policy_kwargs=offpolicy_kwargs, + tensorboard_log=filename+'/tb/', + verbose=1 + ) if obs==ObservationType.KIN else TD3(td3ddpgCnnPolicy, + train_env, + policy_kwargs=offpolicy_kwargs, + tensorboard_log=filename+'/tb/', + verbose=1 + ) + if algo == 'ddpg': + model = DDPG(td3ddpgMlpPolicy, + train_env, + policy_kwargs=offpolicy_kwargs, + tensorboard_log=filename+'/tb/', + verbose=1 + ) if obs==ObservationType.KIN else DDPG(td3ddpgCnnPolicy, + train_env, + policy_kwargs=offpolicy_kwargs, + tensorboard_log=filename+'/tb/', + verbose=1 + ) + + #### Create eveluation environment ######################### + if obs == ObservationType.KIN: + eval_env = gym.make(env_name, + aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, + obs=obs, + act=act + ) + elif obs == ObservationType.RGB: + if env_name == "takeoff-aviary-v0": + eval_env = make_vec_env(TakeoffAviary, + env_kwargs=sa_env_kwargs, + n_envs=1, + seed=0 + ) + if env_name == "hover-aviary-v0": + eval_env = make_vec_env(HoverAviary, + env_kwargs=sa_env_kwargs, + n_envs=1, + seed=0 + ) + if env_name == "flythrugate-aviary-v0": + eval_env = make_vec_env(FlyThruGateAviary, + env_kwargs=sa_env_kwargs, + n_envs=1, + seed=0 + ) + if env_name == "tune-aviary-v0": + eval_env = make_vec_env(TuneAviary, + env_kwargs=sa_env_kwargs, + n_envs=1, + seed=0 + ) + eval_env = VecTransposeImage(eval_env) + + #### Train the model ####################################### + # checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=filename+'-logs/', name_prefix='rl_model') + callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=EPISODE_REWARD_THRESHOLD, + verbose=1 + ) + eval_callback = EvalCallback(eval_env, + callback_on_new_best=callback_on_best, + verbose=1, + best_model_save_path=filename+'/', + log_path=filename+'/', + eval_freq=int(2000/cpu), + deterministic=True, + render=False + ) + model.learn(total_timesteps=steps, #int(1e12), + callback=eval_callback, + log_interval=100, + ) + + #### Save the model ######################################## + model.save(filename+'/success_model.zip') + print(filename) + + #### Print training progression ############################ + with np.load(filename+'/evaluations.npz') as data: + for j in range(data['timesteps'].shape[0]): + print(str(data['timesteps'][j])+","+str(data['results'][j][0])) + + +if __name__ == "__main__": + #### Define and parse (optional) arguments for the script ## + parser = argparse.ArgumentParser(description='Single agent reinforcement learning experiments script') + parser.add_argument('--env', default=DEFAULT_ENV, type=str, choices=['takeoff', 'hover', 'flythrugate', 'tune'], help='Task (default: hover)', metavar='') + parser.add_argument('--algo', default=DEFAULT_ALGO, type=str, choices=['a2c', 'ppo', 'sac', 'td3', 'ddpg'], help='RL agent (default: ppo)', metavar='') + parser.add_argument('--obs', default=DEFAULT_OBS, type=ObservationType, help='Observation space (default: kin)', metavar='') + parser.add_argument('--act', default=DEFAULT_ACT, type=ActionType, help='Action space (default: one_d_rpm)', metavar='') + parser.add_argument('--cpu', default=DEFAULT_CPU, type=int, help='Number of training environments (default: 1)', metavar='') + parser.add_argument('--steps', default=DEFAULT_STEPS, type=int, help='Number of training time steps (default: 35000)', metavar='') + parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='') + ARGS = parser.parse_args() + + run(**vars(ARGS)) \ No newline at end of file diff --git a/gym_pybullet_drones/examples/learning/test_singleagent.py b/gym_pybullet_drones/examples/learning/test_singleagent.py new file mode 100644 index 000000000..dc7e8c033 --- /dev/null +++ b/gym_pybullet_drones/examples/learning/test_singleagent.py @@ -0,0 +1,141 @@ +"""Test script for single agent problems. + +This scripts runs the best model found by one of the executions of `singleagent.py` + +Example +------- +To run the script, type in a terminal: + + $ python test_singleagent.py --exp ./results/save----- + +""" +import os +import time +from datetime import datetime +import argparse +import re +import numpy as np +import gym +import torch +from stable_baselines3.common.env_checker import check_env +from stable_baselines3 import A2C +from stable_baselines3 import PPO +from stable_baselines3 import SAC +from stable_baselines3 import TD3 +from stable_baselines3 import DDPG +from stable_baselines3.common.policies import ActorCriticPolicy as a2cppoMlpPolicy +from stable_baselines3.common.policies import ActorCriticCnnPolicy as a2cppoCnnPolicy +from stable_baselines3.sac.policies import SACPolicy as sacMlpPolicy +from stable_baselines3.sac import CnnPolicy as sacCnnPolicy +from stable_baselines3.td3 import MlpPolicy as td3ddpgMlpPolicy +from stable_baselines3.td3 import CnnPolicy as td3ddpgCnnPolicy +from stable_baselines3.common.evaluation import evaluate_policy + +from gym_pybullet_drones.utils.utils import sync +from gym_pybullet_drones.utils.Logger import Logger +from gym_pybullet_drones.envs.single_agent_rl.TakeoffAviary import TakeoffAviary +from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary +from gym_pybullet_drones.envs.single_agent_rl.FlyThruGateAviary import FlyThruGateAviary +from gym_pybullet_drones.envs.single_agent_rl.TuneAviary import TuneAviary +from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType +from gym_pybullet_drones.utils.utils import sync, str2bool + +import shared_constants + +DEFAULT_GUI = True +DEFAULT_PLOT = True +DEFAULT_OUTPUT_FOLDER = 'results' + +def run(exp, gui=DEFAULT_GUI, plot=DEFAULT_PLOT, output_folder=DEFAULT_OUTPUT_FOLDER): + #### Load the model from file ############################## + algo = exp.split("-")[2] + + if os.path.isfile(exp+'/success_model.zip'): + path = exp+'/success_model.zip' + elif os.path.isfile(exp+'/best_model.zip'): + path = exp+'/best_model.zip' + else: + print("[ERROR]: no model under the specified path", exp) + if algo == 'a2c': + model = A2C.load(path) + if algo == 'ppo': + model = PPO.load(path) + if algo == 'sac': + model = SAC.load(path) + if algo == 'td3': + model = TD3.load(path) + if algo == 'ddpg': + model = DDPG.load(path) + + #### Parameters to recreate the environment ################ + env_name = exp.split("-")[1]+"-aviary-v0" + OBS = ObservationType.KIN if exp.split("-")[3] == 'kin' else ObservationType.RGB + + # Parse ActionType instance from file name + action_name = exp.split("-")[4] + ACT = [action for action in ActionType if action.value == action_name] + if len(ACT) != 1: + raise AssertionError("Result file could have gotten corrupted. Extracted action type does not match any of the existing ones.") + ACT = ACT.pop() + + #### Evaluate the model #################################### + eval_env = gym.make(env_name, + aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, + obs=OBS, + act=ACT + ) + mean_reward, std_reward = evaluate_policy(model, + eval_env, + n_eval_episodes=10 + ) + print("\n\n\nMean reward ", mean_reward, " +- ", std_reward, "\n\n") + + #### Show, record a video, and log the model's performance # + test_env = gym.make(env_name, + gui=gui, + record=False, + aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, + obs=OBS, + act=ACT + ) + logger = Logger(logging_freq_hz=int(test_env.SIM_FREQ/test_env.AGGR_PHY_STEPS), + num_drones=1, + output_folder=output_folder + ) + obs = test_env.reset() + start = time.time() + for i in range(6*int(test_env.SIM_FREQ/test_env.AGGR_PHY_STEPS)): # Up to 6'' + action, _states = model.predict(obs, + deterministic=True # OPTIONAL 'deterministic=False' + ) + obs, reward, done, info = test_env.step(action) + test_env.render() + if OBS==ObservationType.KIN: + logger.log(drone=0, + timestamp=i/test_env.SIM_FREQ, + state= np.hstack([obs[0:3], np.zeros(4), obs[3:15], np.resize(action, (4))]), + control=np.zeros(12) + ) + sync(np.floor(i*test_env.AGGR_PHY_STEPS), start, test_env.TIMESTEP) + # if done: obs = test_env.reset() # OPTIONAL EPISODE HALT + test_env.close() + logger.save_as_csv("sa") # Optional CSV save + if plot: + logger.plot() + + # with np.load(exp+'/evaluations.npz') as data: + # print(data.files) + # print(data['timesteps']) + # print(data['results']) + # print(data['ep_lengths']) + +if __name__ == "__main__": + #### Define and parse (optional) arguments for the script ## + parser = argparse.ArgumentParser(description='Single agent reinforcement learning example script using TakeoffAviary') + parser.add_argument('--exp', type=str, help='The experiment folder written as ./results/save-----', metavar='') + parser.add_argument('--gui', default=DEFAULT_GUI, type=str2bool, help='Whether to use PyBullet GUI (default: False)', metavar='') + parser.add_argument('--plot', default=DEFAULT_PLOT, type=str2bool, help='Whether to plot the simulation results (default: True)', metavar='') + parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='') + ARGS = parser.parse_args() + + run(**vars(ARGS)) \ No newline at end of file diff --git a/gym_pybullet_drones/examples/velocity.py b/gym_pybullet_drones/examples/pid-velocity.py similarity index 100% rename from gym_pybullet_drones/examples/velocity.py rename to gym_pybullet_drones/examples/pid-velocity.py diff --git a/gym_pybullet_drones/examples/fly.py b/gym_pybullet_drones/examples/pid.py similarity index 100% rename from gym_pybullet_drones/examples/fly.py rename to gym_pybullet_drones/examples/pid.py diff --git a/pyproject.toml b/pyproject.toml index d2a032ae5..7b1b1147a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gym-pybullet-drones" -version = "0.0.3" +version = "0.2.0" description = "PyBullet Gym environments for single and multi-agent reinforcement learning of quadcopter control" authors = ["Jacopo Panerati "] license = "MIT" @@ -18,10 +18,10 @@ matplotlib = "^3.5" cycler = "^0.10" gym = "^0.21" pybullet = "^3.2" -torch = "1.11.0" -"ray[rllib]" = "1.9" stable-baselines3 = "1.5.0" scipy = "^1.8" +torch = "1.11.0" +"ray[rllib]" = "1.9" tensorboard = "^2.9" [tool.poetry.dev-dependencies]