From ca1feec8699e0358f7c2cc81545eab27b3b899ed Mon Sep 17 00:00:00 2001
From: Jacopo Panerati <jacopo.panerati@utoronto.ca>
Date: Sat, 15 Apr 2023 19:52:37 +0400
Subject: [PATCH] further cleanup

---
 experiments/learning/ma_script.bash           |   0
 experiments/learning/ma_script.slrm           |   0
 experiments/learning/multiagent.py            | 309 ----------------
 experiments/learning/results/foo.txt          |   0
 experiments/learning/test_multiagent.py       | 332 ------------------
 experiments/{learning => }/sa_script.bash     |   0
 experiments/{learning => }/sa_script.slrm     |   0
 .../{learning => }/shared_constants.py        |   0
 experiments/{learning => }/singleagent.py     |   0
 .../{learning => }/test_singleagent.py        |   0
 gym_pybullet_drones/envs/DynAviary.py         | 219 ------------
 gym_pybullet_drones/envs/RLAviary.py          |   2 +
 gym_pybullet_drones/examples/compare.py       | 140 --------
 gym_pybullet_drones/examples/groundeffect.py  | 167 ---------
 .../examples/{ => learning}/learn.py          |   0
 .../examples/learning/sa_script.bash          |  23 ++
 .../examples/learning/sa_script.slrm          |  53 +++
 .../examples/learning/shared_constants.py     |   2 +
 .../examples/learning/singleagent.py          | 283 +++++++++++++++
 .../examples/learning/test_singleagent.py     | 141 ++++++++
 .../examples/{velocity.py => pid-velocity.py} |   0
 .../examples/{fly.py => pid.py}               |   0
 pyproject.toml                                |   6 +-
 23 files changed, 507 insertions(+), 1170 deletions(-)
 delete mode 100644 experiments/learning/ma_script.bash
 delete mode 100644 experiments/learning/ma_script.slrm
 delete mode 100644 experiments/learning/multiagent.py
 delete mode 100644 experiments/learning/results/foo.txt
 delete mode 100644 experiments/learning/test_multiagent.py
 rename experiments/{learning => }/sa_script.bash (100%)
 rename experiments/{learning => }/sa_script.slrm (100%)
 rename experiments/{learning => }/shared_constants.py (100%)
 rename experiments/{learning => }/singleagent.py (100%)
 rename experiments/{learning => }/test_singleagent.py (100%)
 delete mode 100644 gym_pybullet_drones/envs/DynAviary.py
 create mode 100644 gym_pybullet_drones/envs/RLAviary.py
 delete mode 100644 gym_pybullet_drones/examples/compare.py
 delete mode 100644 gym_pybullet_drones/examples/groundeffect.py
 rename gym_pybullet_drones/examples/{ => learning}/learn.py (100%)
 create mode 100755 gym_pybullet_drones/examples/learning/sa_script.bash
 create mode 100644 gym_pybullet_drones/examples/learning/sa_script.slrm
 create mode 100644 gym_pybullet_drones/examples/learning/shared_constants.py
 create mode 100644 gym_pybullet_drones/examples/learning/singleagent.py
 create mode 100644 gym_pybullet_drones/examples/learning/test_singleagent.py
 rename gym_pybullet_drones/examples/{velocity.py => pid-velocity.py} (100%)
 rename gym_pybullet_drones/examples/{fly.py => pid.py} (100%)
diff --git a/experiments/learning/ma_script.bash b/experiments/learning/ma_script.bash
deleted file mode 100644
index e69de29bb..000000000
diff --git a/experiments/learning/ma_script.slrm b/experiments/learning/ma_script.slrm
deleted file mode 100644
index e69de29bb..000000000
diff --git a/experiments/learning/multiagent.py b/experiments/learning/multiagent.py
deleted file mode 100644
index 2ae74f620..000000000
--- a/experiments/learning/multiagent.py
+++ /dev/null
@@ -1,309 +0,0 @@
-"""Learning script for multi-agent problems.
-
-Agents are based on `ray[rllib]`'s implementation of PPO and use a custom centralized critic.
-
-Example
--------
-To run the script, type in a terminal:
-
-    $ python multiagent.py --num_drones <num_drones> --env <env> --obs <ObservationType> --act <ActionType> --algo <alg> --num_workers <num_workers>
-
-Notes
------
-Check Ray's status at:
-
-    http://127.0.0.1:8265
-
-"""
-import os
-import time
-import argparse
-from datetime import datetime
-from sys import platform
-import subprocess
-import pdb
-import math
-import numpy as np
-import pybullet as p
-import pickle
-import matplotlib.pyplot as plt
-import gym
-from gym import error, spaces, utils
-from gym.utils import seeding
-from gym.spaces import Box, Dict
-import torch
-import torch.nn as nn
-from ray.rllib.models.torch.fcnet import FullyConnectedNetwork
-import ray
-from ray import tune
-from ray.tune.logger import DEFAULT_LOGGERS
-from ray.tune import register_env
-from ray.rllib.agents import ppo
-from ray.rllib.agents.ppo import PPOTrainer, PPOTFPolicy
-from ray.rllib.examples.policy.random_policy import RandomPolicy
-from ray.rllib.utils.test_utils import check_learning_achieved
-from ray.rllib.agents.callbacks import DefaultCallbacks
-from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
-from ray.rllib.models import ModelCatalog
-from ray.rllib.policy.sample_batch import SampleBatch
-from ray.rllib.env.multi_agent_env import ENV_STATE
-
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary
-from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary
-from gym_pybullet_drones.envs.multi_agent_rl.MeetupAviary import MeetupAviary
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
-from gym_pybullet_drones.utils.Logger import Logger
-
-import shared_constants
-
-OWN_OBS_VEC_SIZE = None # Modified at runtime
-ACTION_VEC_SIZE = None # Modified at runtime
-
-#### Useful links ##########################################
-# Workflow: github.com/ray-project/ray/blob/master/doc/source/rllib-training.rst
-# ENV_STATE example: github.com/ray-project/ray/blob/master/rllib/examples/env/two_step_game.py
-# Competing policies example: github.com/ray-project/ray/blob/master/rllib/examples/rock_paper_scissors_multiagent.py
-
-############################################################
-class CustomTorchCentralizedCriticModel(TorchModelV2, nn.Module):
-    """Multi-agent model that implements a centralized value function.
-
-    It assumes the observation is a dict with 'own_obs' and 'opponent_obs', the
-    former of which can be used for computing actions (i.e., decentralized
-    execution), and the latter for optimization (i.e., centralized learning).
-
-    This model has two parts:
-    - An action model that looks at just 'own_obs' to compute actions
-    - A value model that also looks at the 'opponent_obs' / 'opponent_action'
-      to compute the value (it does this by using the 'obs_flat' tensor).
-    """
-
-    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
-        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
-        nn.Module.__init__(self)
-        self.action_model = FullyConnectedNetwork(
-                                                  Box(low=-1, high=1, shape=(OWN_OBS_VEC_SIZE, )), 
-                                                  action_space,
-                                                  num_outputs,
-                                                  model_config,
-                                                  name + "_action"
-                                                  )
-        self.value_model = FullyConnectedNetwork(
-                                                 obs_space, 
-                                                 action_space,
-                                                 1, 
-                                                 model_config, 
-                                                 name + "_vf"
-                                                 )
-        self._model_in = None
-
-    def forward(self, input_dict, state, seq_lens):
-        self._model_in = [input_dict["obs_flat"], state, seq_lens]
-        return self.action_model({"obs": input_dict["obs"]["own_obs"]}, state, seq_lens)
-
-    def value_function(self):
-        value_out, _ = self.value_model({"obs": self._model_in[0]}, self._model_in[1], self._model_in[2])
-        return torch.reshape(value_out, [-1])
-
-############################################################
-class FillInActions(DefaultCallbacks):
-    def on_postprocess_trajectory(self, worker, episode, agent_id, policy_id, policies, postprocessed_batch, original_batches, **kwargs):
-        to_update = postprocessed_batch[SampleBatch.CUR_OBS]
-        other_id = 1 if agent_id == 0 else 0
-        action_encoder = ModelCatalog.get_preprocessor_for_space( 
-                                                                 # Box(-np.inf, np.inf, (ACTION_VEC_SIZE,), np.float32) # Unbounded
-                                                                 Box(-1, 1, (ACTION_VEC_SIZE,), np.float32) # Bounded
-                                                                 )
-        _, opponent_batch = original_batches[other_id]
-        # opponent_actions = np.array([action_encoder.transform(a) for a in opponent_batch[SampleBatch.ACTIONS]]) # Unbounded
-        opponent_actions = np.array([action_encoder.transform(np.clip(a, -1, 1)) for a in opponent_batch[SampleBatch.ACTIONS]]) # Bounded
-        to_update[:, -ACTION_VEC_SIZE:] = opponent_actions
-
-############################################################
-def central_critic_observer(agent_obs, **kw):
-    new_obs = {
-        0: {
-            "own_obs": agent_obs[0],
-            "opponent_obs": agent_obs[1],
-            "opponent_action": np.zeros(ACTION_VEC_SIZE), # Filled in by FillInActions
-        },
-        1: {
-            "own_obs": agent_obs[1],
-            "opponent_obs": agent_obs[0],
-            "opponent_action": np.zeros(ACTION_VEC_SIZE), # Filled in by FillInActions
-        },
-    }
-    return new_obs
-
-############################################################
-if __name__ == "__main__":
-
-    #### Define and parse (optional) arguments for the script ##
-    parser = argparse.ArgumentParser(description='Multi-agent reinforcement learning experiments script')
-    parser.add_argument('--num_drones',  default=2,                 type=int,                                                                 help='Number of drones (default: 2)', metavar='')
-    parser.add_argument('--env',         default='leaderfollower',  type=str,             choices=['leaderfollower', 'flock', 'meetup'],      help='Task (default: leaderfollower)', metavar='')
-    parser.add_argument('--obs',         default='kin',             type=ObservationType,                                                     help='Observation space (default: kin)', metavar='')
-    parser.add_argument('--act',         default='one_d_rpm',       type=ActionType,                                                          help='Action space (default: one_d_rpm)', metavar='')
-    parser.add_argument('--algo',        default='cc',              type=str,             choices=['cc'],                                     help='MARL approach (default: cc)', metavar='')
-    parser.add_argument('--workers',     default=0,                 type=int,                                                                 help='Number of RLlib workers (default: 0)', metavar='')        
-    ARGS = parser.parse_args()
-
-    #### Save directory ########################################
-    filename = os.path.dirname(os.path.abspath(__file__))+'/results/save-'+ARGS.env+'-'+str(ARGS.num_drones)+'-'+ARGS.algo+'-'+ARGS.obs.value+'-'+ARGS.act.value+'-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S")
-    if not os.path.exists(filename):
-        os.makedirs(filename+'/')
-
-    #### Print out current git commit hash #####################
-    if platform == "linux" or platform == "darwin":
-        git_commit = subprocess.check_output(["git", "describe", "--tags"]).strip()
-        with open(filename+'/git_commit.txt', 'w+') as f:
-            f.write(str(git_commit))
-
-    #### Constants, and errors #################################
-    if ARGS.obs==ObservationType.KIN:
-        OWN_OBS_VEC_SIZE = 12
-    elif ARGS.obs==ObservationType.RGB:
-        print("[ERROR] ObservationType.RGB for multi-agent systems not yet implemented")
-        exit()
-    else:
-        print("[ERROR] unknown ObservationType")
-        exit()
-    if ARGS.act in [ActionType.ONE_D_RPM, ActionType.ONE_D_DYN, ActionType.ONE_D_PID]:
-        ACTION_VEC_SIZE = 1
-    elif ARGS.act in [ActionType.RPM, ActionType.DYN, ActionType.VEL]:
-        ACTION_VEC_SIZE = 4
-    elif ARGS.act == ActionType.PID:
-        ACTION_VEC_SIZE = 3
-    else:
-        print("[ERROR] unknown ActionType")
-        exit()
-
-    #### Uncomment to debug slurm scripts ######################
-    # exit()
-
-    #### Initialize Ray Tune ###################################
-    ray.shutdown()
-    ray.init(ignore_reinit_error=True)
-
-    #### Register the custom centralized critic model ##########
-    ModelCatalog.register_custom_model("cc_model", CustomTorchCentralizedCriticModel)
-
-    #### Register the environment ##############################
-    temp_env_name = "this-aviary-v0"
-    if ARGS.env == 'flock':
-        register_env(temp_env_name, lambda _: FlockAviary(num_drones=ARGS.num_drones,
-                                                          aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                                          obs=ARGS.obs,
-                                                          act=ARGS.act
-                                                          )
-                     )
-    elif ARGS.env == 'leaderfollower':
-        register_env(temp_env_name, lambda _: LeaderFollowerAviary(num_drones=ARGS.num_drones,
-                                                                   aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                                                   obs=ARGS.obs,
-                                                                   act=ARGS.act
-                                                                   )
-                     )
-    elif ARGS.env == 'meetup':
-        register_env(temp_env_name, lambda _: MeetupAviary(num_drones=ARGS.num_drones,
-                                                           aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                                           obs=ARGS.obs,
-                                                           act=ARGS.act
-                                                           )
-                     )
-    else:
-        print("[ERROR] environment not yet implemented")
-        exit()
-
-    #### Unused env to extract the act and obs spaces ##########
-    if ARGS.env == 'flock':
-        temp_env = FlockAviary(num_drones=ARGS.num_drones,
-                               aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                               obs=ARGS.obs,
-                               act=ARGS.act
-                               )
-    elif ARGS.env == 'leaderfollower':
-        temp_env = LeaderFollowerAviary(num_drones=ARGS.num_drones,
-                                        aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                        obs=ARGS.obs,
-                                        act=ARGS.act
-                                        )
-    elif ARGS.env == 'meetup':
-        temp_env = MeetupAviary(num_drones=ARGS.num_drones,
-                                aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                obs=ARGS.obs,
-                                act=ARGS.act
-                                )
-    else:
-        print("[ERROR] environment not yet implemented")
-        exit()
-    observer_space = Dict({
-        "own_obs": temp_env.observation_space[0],
-        "opponent_obs": temp_env.observation_space[0],
-        "opponent_action": temp_env.action_space[0],
-    })
-    action_space = temp_env.action_space[0]
-
-    #### Note ##################################################
-    # RLlib will create ``num_workers + 1`` copies of the
-    # environment since one copy is needed for the driver process.
-    # To avoid paying the extra overhead of the driver copy,
-    # which is needed to access the env's action and observation spaces,
-    # you can defer environment initialization until ``reset()`` is called
-
-    #### Set up the trainer's config ###########################
-    config = ppo.DEFAULT_CONFIG.copy() # For the default config, see github.com/ray-project/ray/blob/master/rllib/agents/trainer.py
-    config = {
-        "env": temp_env_name,
-        "num_workers": 0 + ARGS.workers,
-        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0
-        "batch_mode": "complete_episodes",
-        "callbacks": FillInActions,
-        "framework": "torch",
-    }
-
-    #### Set up the model parameters of the trainer's config ###
-    config["model"] = { 
-        "custom_model": "cc_model",
-    }
-    
-    #### Set up the multiagent params of the trainer's config ##
-    config["multiagent"] = { 
-        "policies": {
-            "pol0": (None, observer_space, action_space, {"agent_id": 0,}),
-            "pol1": (None, observer_space, action_space, {"agent_id": 1,}),
-        },
-        "policy_mapping_fn": lambda x: "pol0" if x == 0 else "pol1", # # Function mapping agent ids to policy ids
-        "observation_fn": central_critic_observer, # See rllib/evaluation/observation_function.py for more info
-    }
-
-    #### Ray Tune stopping conditions ##########################
-    stop = {
-        "timesteps_total": 120000, # 100000 ~= 10'
-        # "episode_reward_mean": 0,
-        # "training_iteration": 0,
-    }
-
-    #### Train #################################################
-    results = tune.run(
-        "PPO",
-        stop=stop,
-        config=config,
-        verbose=True,
-        checkpoint_at_end=True,
-        local_dir=filename,
-    )
-    # check_learning_achieved(results, 1.0)
-
-    #### Save agent ############################################
-    checkpoints = results.get_trial_checkpoints_paths(trial=results.get_best_trial('episode_reward_mean',
-                                                                                   mode='max'
-                                                                                   ),
-                                                      metric='episode_reward_mean'
-                                                      )
-    with open(filename+'/checkpoint.txt', 'w+') as f:
-        f.write(checkpoints[0][0])
-
-    #### Shut down Ray #########################################
-    ray.shutdown()
diff --git a/experiments/learning/results/foo.txt b/experiments/learning/results/foo.txt
deleted file mode 100644
index e69de29bb..000000000
diff --git a/experiments/learning/test_multiagent.py b/experiments/learning/test_multiagent.py
deleted file mode 100644
index 3176c8a5f..000000000
--- a/experiments/learning/test_multiagent.py
+++ /dev/null
@@ -1,332 +0,0 @@
-"""Test script for multiagent problems.
-
-This scripts runs the best model found by one of the executions of `multiagent.py`
-
-Example
--------
-To run the script, type in a terminal:
-
-    $ python test_multiagent.py --exp ./results/save-<env>-<num_drones>-<algo>-<obs>-<act>-<time_date>
-
-"""
-import os
-import time
-import argparse
-from datetime import datetime
-import pdb
-import math
-import numpy as np
-import pybullet as p
-import pickle
-import matplotlib.pyplot as plt
-import gym
-from gym import error, spaces, utils
-from gym.utils import seeding
-from gym.spaces import Box, Dict
-import torch
-import torch.nn as nn
-import ray
-from ray import tune
-from ray.tune import register_env
-from ray.rllib.agents import ppo
-from ray.rllib.agents.ppo import PPOTrainer, PPOTFPolicy
-from ray.rllib.examples.policy.random_policy import RandomPolicy
-from ray.rllib.utils.test_utils import check_learning_achieved
-from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
-from ray.rllib.agents.callbacks import DefaultCallbacks
-from ray.rllib.models.torch.fcnet import FullyConnectedNetwork
-from ray.rllib.models import ModelCatalog
-from ray.rllib.policy.sample_batch import SampleBatch
-
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary
-from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary
-from gym_pybullet_drones.envs.multi_agent_rl.MeetupAviary import MeetupAviary
-from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
-from gym_pybullet_drones.utils.Logger import Logger
-from gym_pybullet_drones.utils.utils import sync
-
-import shared_constants
-
-OWN_OBS_VEC_SIZE = None # Modified at runtime
-ACTION_VEC_SIZE = None # Modified at runtime
-
-############################################################
-class CustomTorchCentralizedCriticModel(TorchModelV2, nn.Module):
-    """Multi-agent model that implements a centralized value function.
-
-    It assumes the observation is a dict with 'own_obs' and 'opponent_obs', the
-    former of which can be used for computing actions (i.e., decentralized
-    execution), and the latter for optimization (i.e., centralized learning).
-
-    This model has two parts:
-    - An action model that looks at just 'own_obs' to compute actions
-    - A value model that also looks at the 'opponent_obs' / 'opponent_action'
-      to compute the value (it does this by using the 'obs_flat' tensor).
-    """
-
-    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
-        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
-        nn.Module.__init__(self)
-        self.action_model = FullyConnectedNetwork(
-                                                  Box(low=-1, high=1, shape=(OWN_OBS_VEC_SIZE, )), 
-                                                  action_space,
-                                                  num_outputs,
-                                                  model_config,
-                                                  name + "_action"
-                                                  )
-        self.value_model = FullyConnectedNetwork(
-                                                 obs_space, 
-                                                 action_space,
-                                                 1, 
-                                                 model_config, 
-                                                 name + "_vf"
-                                                 )
-        self._model_in = None
-
-    def forward(self, input_dict, state, seq_lens):
-        self._model_in = [input_dict["obs_flat"], state, seq_lens]
-        return self.action_model({"obs": input_dict["obs"]["own_obs"]}, state, seq_lens)
-
-    def value_function(self):
-        value_out, _ = self.value_model({"obs": self._model_in[0]}, self._model_in[1], self._model_in[2])
-        return torch.reshape(value_out, [-1])
-
-############################################################
-class FillInActions(DefaultCallbacks):
-    def on_postprocess_trajectory(self, worker, episode, agent_id, policy_id, policies, postprocessed_batch, original_batches, **kwargs):
-        to_update = postprocessed_batch[SampleBatch.CUR_OBS]
-        other_id = 1 if agent_id == 0 else 0
-        action_encoder = ModelCatalog.get_preprocessor_for_space( 
-                                                                 # Box(-np.inf, np.inf, (ACTION_VEC_SIZE,), np.float32) # Unbounded
-                                                                 Box(-1, 1, (ACTION_VEC_SIZE,), np.float32) # Bounded
-                                                                 )
-        _, opponent_batch = original_batches[other_id]
-        # opponent_actions = np.array([action_encoder.transform(a) for a in opponent_batch[SampleBatch.ACTIONS]]) # Unbounded
-        opponent_actions = np.array([action_encoder.transform(np.clip(a, -1, 1)) for a in opponent_batch[SampleBatch.ACTIONS]]) # Bounded
-        to_update[:, -ACTION_VEC_SIZE:] = opponent_actions
-
-############################################################
-def central_critic_observer(agent_obs, **kw):
-    new_obs = {
-        0: {
-            "own_obs": agent_obs[0],
-            "opponent_obs": agent_obs[1],
-            "opponent_action": np.zeros(ACTION_VEC_SIZE), # Filled in by FillInActions
-        },
-        1: {
-            "own_obs": agent_obs[1],
-            "opponent_obs": agent_obs[0],
-            "opponent_action": np.zeros(ACTION_VEC_SIZE), # Filled in by FillInActions
-        },
-    }
-    return new_obs
-
-############################################################
-if __name__ == "__main__":
-
-    #### Define and parse (optional) arguments for the script ##
-    parser = argparse.ArgumentParser(description='Multi-agent reinforcement learning experiments script')
-    parser.add_argument('--exp',    type=str,       help='The experiment folder written as ./results/save-<env>-<num_drones>-<algo>-<obs>-<act>-<time_date>', metavar='')
-    ARGS = parser.parse_args()
-
-    #### Parameters to recreate the environment ################
-    NUM_DRONES = int(ARGS.exp.split("-")[2])
-    OBS = ObservationType.KIN if ARGS.exp.split("-")[4] == 'kin' else ObservationType.RGB
-    
-    # Parse ActionType instance from file name
-    action_name = ARGS.exp.split("-")[5]
-    ACT = [action for action in ActionType if action.value == action_name]
-    if len(ACT) != 1:
-        raise AssertionError("Result file could have gotten corrupted. Extracted action type does not match any of the existing ones.")
-    ACT = ACT.pop()
-
-    #### Constants, and errors #################################
-    if OBS == ObservationType.KIN:
-        OWN_OBS_VEC_SIZE = 12
-    elif OBS == ObservationType.RGB:
-        print("[ERROR] ObservationType.RGB for multi-agent systems not yet implemented")
-        exit()
-    else:
-        print("[ERROR] unknown ObservationType")
-        exit()
-    if ACT in [ActionType.ONE_D_RPM, ActionType.ONE_D_DYN, ActionType.ONE_D_PID]:
-        ACTION_VEC_SIZE = 1
-    elif ACT in [ActionType.RPM, ActionType.DYN, ActionType.VEL]:
-        ACTION_VEC_SIZE = 4
-    elif ACT == ActionType.PID:
-        ACTION_VEC_SIZE = 3
-    else:
-        print("[ERROR] unknown ActionType")
-        exit()
-
-    #### Initialize Ray Tune ###################################
-    ray.shutdown()
-    ray.init(ignore_reinit_error=True)
-
-    #### Register the custom centralized critic model ##########
-    ModelCatalog.register_custom_model("cc_model", CustomTorchCentralizedCriticModel)
-
-    #### Register the environment ##############################
-    temp_env_name = "this-aviary-v0"
-    if ARGS.exp.split("-")[1] == 'flock':
-        register_env(temp_env_name, lambda _: FlockAviary(num_drones=NUM_DRONES,
-                                                          aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                                          obs=OBS,
-                                                          act=ACT
-                                                          )
-                     )
-    elif ARGS.exp.split("-")[1] == 'leaderfollower':
-        register_env(temp_env_name, lambda _: LeaderFollowerAviary(num_drones=NUM_DRONES,
-                                                                   aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                                                   obs=OBS,
-                                                                   act=ACT
-                                                                   )
-                     )
-    elif ARGS.exp.split("-")[1] == 'meetup':
-        register_env(temp_env_name, lambda _: MeetupAviary(num_drones=NUM_DRONES,
-                                                           aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                                           obs=OBS,
-                                                           act=ACT
-                                                           )
-                     )
-    else:
-        print("[ERROR] environment not yet implemented")
-        exit()
-
-    #### Unused env to extract the act and obs spaces ##########
-    if ARGS.exp.split("-")[1] == 'flock':
-        temp_env = FlockAviary(num_drones=NUM_DRONES,
-                               aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                               obs=OBS,
-                               act=ACT
-                               )
-    elif ARGS.exp.split("-")[1] == 'leaderfollower':
-        temp_env = LeaderFollowerAviary(num_drones=NUM_DRONES,
-                                        aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                        obs=OBS,
-                                        act=ACT
-                                        )
-    elif ARGS.exp.split("-")[1] == 'meetup':
-        temp_env = MeetupAviary(num_drones=NUM_DRONES,
-                                aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                obs=OBS,
-                                act=ACT
-                                )
-    else:
-        print("[ERROR] environment not yet implemented")
-        exit()
-    observer_space = Dict({
-        "own_obs": temp_env.observation_space[0],
-        "opponent_obs": temp_env.observation_space[0],
-        "opponent_action": temp_env.action_space[0],
-    })
-    action_space = temp_env.action_space[0]
-
-    #### Set up the trainer's config ###########################
-    config = ppo.DEFAULT_CONFIG.copy() # For the default config, see github.com/ray-project/ray/blob/master/rllib/agents/trainer.py
-    config = {
-        "env": temp_env_name,
-        "num_workers": 0, #0+ARGS.workers,
-        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0
-        "batch_mode": "complete_episodes",
-        "callbacks": FillInActions,
-        "framework": "torch",
-    }
-
-    #### Set up the model parameters of the trainer's config ###
-    config["model"] = { 
-        "custom_model": "cc_model",
-    }
-    
-    #### Set up the multiagent params of the trainer's config ##
-    config["multiagent"] = { 
-        "policies": {
-            "pol0": (None, observer_space, action_space, {"agent_id": 0,}),
-            "pol1": (None, observer_space, action_space, {"agent_id": 1,}),
-        },
-        "policy_mapping_fn": lambda x: "pol0" if x == 0 else "pol1", # # Function mapping agent ids to policy ids
-        "observation_fn": central_critic_observer, # See rllib/evaluation/observation_function.py for more info
-    }
-
-    #### Restore agent #########################################
-    agent = ppo.PPOTrainer(config=config)
-    with open(ARGS.exp+'/checkpoint.txt', 'r+') as f:
-        checkpoint = f.read()
-    agent.restore(checkpoint)
-
-    #### Extract and print policies ############################
-    policy0 = agent.get_policy("pol0")
-    print("action model 0", policy0.model.action_model)
-    print("value model 0", policy0.model.value_model)
-    policy1 = agent.get_policy("pol1")
-    print("action model 1", policy1.model.action_model)
-    print("value model 1", policy1.model.value_model)
-
-    #### Create test environment ###############################
-    if ARGS.exp.split("-")[1] == 'flock':
-        test_env = FlockAviary(num_drones=NUM_DRONES,
-                               aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                               obs=OBS,
-                               act=ACT,
-                               gui=True,
-                               record=False
-                               )
-    elif ARGS.exp.split("-")[1] == 'leaderfollower':
-        test_env = LeaderFollowerAviary(num_drones=NUM_DRONES,
-                                        aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                        obs=OBS,
-                                        act=ACT,
-                                        gui=True,
-                                        record=False
-                                        )
-    elif ARGS.exp.split("-")[1] == 'meetup':
-        test_env = MeetupAviary(num_drones=NUM_DRONES,
-                                aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
-                                obs=OBS,
-                                act=ACT,
-                                gui=True,
-                                record=False
-                                )
-    else:
-        print("[ERROR] environment not yet implemented")
-        exit()
-    
-    #### Show, record a video, and log the model's performance #
-    obs = test_env.reset()
-    logger = Logger(logging_freq_hz=int(test_env.SIM_FREQ/test_env.AGGR_PHY_STEPS),
-                    num_drones=NUM_DRONES
-                    )
-    if ACT in [ActionType.ONE_D_RPM, ActionType.ONE_D_DYN, ActionType.ONE_D_PID]:
-        action = {i: np.array([0]) for i in range(NUM_DRONES)}
-    elif ACT in [ActionType.RPM, ActionType.DYN, ActionType.VEL]:
-        action = {i: np.array([0, 0, 0, 0]) for i in range(NUM_DRONES)}
-    elif ACT==ActionType.PID:
-         action = {i: np.array([0, 0, 0]) for i in range(NUM_DRONES)}
-    else:
-        print("[ERROR] unknown ActionType")
-        exit()
-    start = time.time()
-    for i in range(6*int(test_env.SIM_FREQ/test_env.AGGR_PHY_STEPS)): # Up to 6''
-        #### Deploy the policies ###################################
-        temp = {}
-        temp[0] = policy0.compute_single_action(np.hstack([action[1], obs[1], obs[0]])) # Counterintuitive order, check params.json
-        temp[1] = policy1.compute_single_action(np.hstack([action[0], obs[0], obs[1]]))
-        action = {0: temp[0][0], 1: temp[1][0]}
-        obs, reward, done, info = test_env.step(action)
-        test_env.render()
-        if OBS==ObservationType.KIN: 
-            for j in range(NUM_DRONES):
-                logger.log(drone=j,
-                           timestamp=i/test_env.SIM_FREQ,
-                           state= np.hstack([obs[j][0:3], np.zeros(4), obs[j][3:15], np.resize(action[j], (4))]),
-                           control=np.zeros(12)
-                           )
-        sync(np.floor(i*test_env.AGGR_PHY_STEPS), start, test_env.TIMESTEP)
-        # if done["__all__"]: obs = test_env.reset() # OPTIONAL EPISODE HALT
-    test_env.close()
-    logger.save_as_csv("ma") # Optional CSV save
-    logger.plot()
-
-    #### Shut down Ray #########################################
-    ray.shutdown()
diff --git a/experiments/learning/sa_script.bash b/experiments/sa_script.bash
similarity index 100%
rename from experiments/learning/sa_script.bash
rename to experiments/sa_script.bash
diff --git a/experiments/learning/sa_script.slrm b/experiments/sa_script.slrm
similarity index 100%
rename from experiments/learning/sa_script.slrm
rename to experiments/sa_script.slrm
diff --git a/experiments/learning/shared_constants.py b/experiments/shared_constants.py
similarity index 100%
rename from experiments/learning/shared_constants.py
rename to experiments/shared_constants.py
diff --git a/experiments/learning/singleagent.py b/experiments/singleagent.py
similarity index 100%
rename from experiments/learning/singleagent.py
rename to experiments/singleagent.py
diff --git a/experiments/learning/test_singleagent.py b/experiments/test_singleagent.py
similarity index 100%
rename from experiments/learning/test_singleagent.py
rename to experiments/test_singleagent.py
diff --git a/gym_pybullet_drones/envs/DynAviary.py b/gym_pybullet_drones/envs/DynAviary.py
deleted file mode 100644
index 744aa9c8e..000000000
--- a/gym_pybullet_drones/envs/DynAviary.py
+++ /dev/null
@@ -1,219 +0,0 @@
-import numpy as np
-from gym import spaces
-
-from gym_pybullet_drones.envs.BaseAviary import BaseAviary
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.utils.utils import nnlsRPM
-
-class DynAviary(BaseAviary):
-    """Multi-drone environment class for control with desired thrust and torques."""
-
-    ################################################################################
-    
-    def __init__(self,
-                 drone_model: DroneModel=DroneModel.CF2X,
-                 num_drones: int=1,
-                 neighbourhood_radius: float=np.inf,
-                 initial_xyzs=None,
-                 initial_rpys=None,
-                 physics: Physics=Physics.PYB,
-                 freq: int=240,
-                 aggregate_phy_steps: int=1,
-                 gui=False,
-                 record=False,
-                 obstacles=False,
-                 user_debug_gui=True,
-                 output_folder='results'
-                 ):
-        """Initialization of an aviary controlled by desired thrust and torques.
-
-        Attribute `dynamics_attributes` is automatically set to True when calling
-        the superclass `__init__()` method.
-
-        Parameters
-        ----------
-        drone_model : DroneModel, optional
-            The desired drone type (detailed in an .urdf file in folder `assets`).
-        num_drones : int, optional
-            The desired number of drones in the aviary.
-        neighbourhood_radius : float, optional
-            Radius used to compute the drones' adjacency matrix, in meters.
-        initial_xyzs: ndarray | None, optional
-            (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
-        initial_rpys: ndarray | None, optional
-            (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
-        physics : Physics, optional
-            The desired implementation of PyBullet physics/custom dynamics.
-        freq : int, optional
-            The frequency (Hz) at which the physics engine steps.
-        aggregate_phy_steps : int, optional
-            The number of physics steps within one call to `BaseAviary.step()`.
-        gui : bool, optional
-            Whether to use PyBullet's GUI.
-        record : bool, optional
-            Whether to save a video of the simulation in folder `files/videos/`.
-        obstacles : bool, optional
-            Whether to add obstacles to the simulation.
-        user_debug_gui : bool, optional
-            Whether to draw the drones' axes and the GUI RPMs sliders.
-
-        """
-        super().__init__(drone_model=drone_model,
-                         num_drones=num_drones,
-                         neighbourhood_radius=neighbourhood_radius,
-                         initial_xyzs=initial_xyzs,
-                         initial_rpys=initial_rpys,
-                         physics=physics,
-                         freq=freq,
-                         aggregate_phy_steps=aggregate_phy_steps,
-                         gui=gui,
-                         record=record,
-                         obstacles=obstacles,
-                         user_debug_gui=user_debug_gui,
-                         dynamics_attributes=True,
-                         output_folder=output_folder
-                         )
-
-    
-    ################################################################################
-    
-    def _actionSpace(self):
-        """Returns the action space of the environment.
-
-        Returns
-        -------
-        dict[str, ndarray]
-            A Dict of Box(4,) with NUM_DRONES entries,
-            indexed by drone Id in string format.
-
-        """
-        #### Action vector ######## Thrust           X Torque             Y Torque             Z Torque
-        act_lower_bound = np.array([0.,              -self.MAX_XY_TORQUE, -self.MAX_XY_TORQUE, -self.MAX_Z_TORQUE])
-        act_upper_bound = np.array([self.MAX_THRUST, self.MAX_XY_TORQUE,  self.MAX_XY_TORQUE,  self.MAX_Z_TORQUE])
-        return spaces.Dict({str(i): spaces.Box(low=act_lower_bound,
-                                               high=act_upper_bound,
-                                               dtype=np.float32
-                                               ) for i in range(self.NUM_DRONES)})
-    
-    ################################################################################
-    
-    def _observationSpace(self):
-        """Returns the observation space of the environment.
-
-        Returns
-        -------
-        dict[str, dict[str, ndarray]]
-            A Dict with NUM_DRONES entries indexed by Id in string format,
-            each a Dict in the form {Box(20,), MultiBinary(NUM_DRONES)}.
-
-        """
-        #### Observation vector ### X        Y        Z       Q1   Q2   Q3   Q4   R       P       Y       VX       VY       VZ       WX       WY       WZ       P0            P1            P2            P3
-        obs_lower_bound = np.array([-np.inf, -np.inf, 0.,     -1., -1., -1., -1., -np.pi, -np.pi, -np.pi, -np.inf, -np.inf, -np.inf, -np.inf, -np.inf, -np.inf, 0.,           0.,           0.,           0.])
-        obs_upper_bound = np.array([np.inf,  np.inf,  np.inf, 1.,  1.,  1.,  1.,  np.pi,  np.pi,  np.pi,  np.inf,  np.inf,  np.inf,  np.inf,  np.inf,  np.inf,  self.MAX_RPM, self.MAX_RPM, self.MAX_RPM, self.MAX_RPM])
-        return spaces.Dict({str(i): spaces.Dict({"state": spaces.Box(low=obs_lower_bound,
-                                                                     high=obs_upper_bound,
-                                                                     dtype=np.float32
-                                                                     ),
-                                                 "neighbors": spaces.MultiBinary(self.NUM_DRONES)
-                                                 }) for i in range(self.NUM_DRONES)})
-
-    ################################################################################
-    
-    def _computeObs(self):
-        """Returns the current observation of the environment.
-
-        For the value of key "state", see the implementation of `_getDroneStateVector()`,
-        the value of key "neighbors" is the drone's own row of the adjacency matrix.
-
-        Returns
-        -------
-        dict[str, dict[str, ndarray]]
-            A Dict with NUM_DRONES entries indexed by Id in string format,
-            each a Dict in the form {Box(20,), MultiBinary(NUM_DRONES)}.
-
-        """
-        adjacency_mat = self._getAdjacencyMatrix()
-        return {str(i): {"state": self._getDroneStateVector(i), "neighbors": adjacency_mat[i,:]} for i in range(self.NUM_DRONES) }
-    
-    ################################################################################
-    
-    def _preprocessAction(self,
-                          action
-                          ):
-        """Pre-processes the action passed to `.step()` into motors' RPMs.
-
-        Solves desired thrust and torques using NNLS and converts a dictionary into a 2D array.
-
-        Parameters
-        ----------
-        action : dict[str, ndarray]
-            The input action each drone (desired thrust and torques), to be translated into RPMs.
-
-        Returns
-        -------
-        ndarray
-            (NUM_DRONES, 4)-shaped array of ints containing to clipped RPMs
-            commanded to the 4 motors of each drone.
-
-        """
-        clipped_action = np.zeros((self.NUM_DRONES, 4))
-        for k, v in action.items():
-            clipped_action[int(k), :] = nnlsRPM(thrust=v[0],
-                                                x_torque=v[1],
-                                                y_torque=v[2],
-                                                z_torque=v[3],
-                                                counter=self.step_counter,
-                                                max_thrust=self.MAX_THRUST,
-                                                max_xy_torque=self.MAX_XY_TORQUE,
-                                                max_z_torque=self.MAX_Z_TORQUE,
-                                                a=self.A,
-                                                inv_a=self.INV_A,
-                                                b_coeff=self.B_COEFF,
-                                                gui=self.GUI
-                                                )
-        return clipped_action
-
-    ################################################################################
-
-    def _computeReward(self):
-        """Computes the current reward value(s).
-
-        Unused as this subclass is not meant for reinforcement learning.
-
-        Returns
-        -------
-        int
-            Dummy value.
-
-        """
-        return -1
-
-    ################################################################################
-    
-    def _computeDone(self):
-        """Computes the current done value(s).
-
-        Unused as this subclass is not meant for reinforcement learning.
-
-        Returns
-        -------
-        bool
-            Dummy value.
-
-        """
-        return False
-
-    ################################################################################
-    
-    def _computeInfo(self):
-        """Computes the current info dict(s).
-
-        Unused as this subclass is not meant for reinforcement learning.
-
-        Returns
-        -------
-        dict[str, int]
-            Dummy value.
-
-        """
-        return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years
diff --git a/gym_pybullet_drones/envs/RLAviary.py b/gym_pybullet_drones/envs/RLAviary.py
new file mode 100644
index 000000000..bbf5bdb4b
--- /dev/null
+++ b/gym_pybullet_drones/envs/RLAviary.py
@@ -0,0 +1,2 @@
+'''TBD
+'''
diff --git a/gym_pybullet_drones/examples/compare.py b/gym_pybullet_drones/examples/compare.py
deleted file mode 100644
index 96c0e5be9..000000000
--- a/gym_pybullet_drones/examples/compare.py
+++ /dev/null
@@ -1,140 +0,0 @@
-"""Script comparing a gym_pybullet_drones simulation to a trace file.
-
-It is meant to compare/validate the behaviour of a pyhsics implementation.
-
-Example
--------
-In a terminal, run as:
-
-    $ python compare.py
-
-Notes
------
-The comparison is along a 2D trajectory in the X-Z plane, between x == +1 and -1.
-
-"""
-import time
-import argparse
-import pickle
-import numpy as np
-import pkg_resources
-
-from gym_pybullet_drones.utils.utils import sync, str2bool
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary
-from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl
-from gym_pybullet_drones.utils.Logger import Logger
-
-DEFAULT_PHYICS = Physics('pyb')
-DEFAULT_GUI = False
-DEFAULT_RECORD_VIDEO = False
-DEFAULT_TRACE_FILE = pkg_resources.resource_filename('gym_pybullet_drones', 'assets/example_trace.pkl')
-DEFAULT_OUTPUT_FOLDER = 'results'
-DEFAULT_COLAB = False
-
-def run(
-        physics=DEFAULT_PHYICS, 
-        gui=DEFAULT_GUI, 
-        record_video=DEFAULT_RECORD_VIDEO, 
-        trace_file=DEFAULT_TRACE_FILE, 
-        output_folder=DEFAULT_OUTPUT_FOLDER,
-        plot=True,
-        colab=DEFAULT_COLAB
-        ):
-    #### Load a trace and control reference from a .pkl file ###
-    with open(trace_file, 'rb') as in_file:
-        TRACE_TIMESTAMPS, TRACE_DATA, TRACE_CTRL_REFERENCE, _, _, _ = pickle.load(in_file)
-
-    #### Compute trace's parameters ############################
-    DURATION_SEC = int(TRACE_TIMESTAMPS[-1])
-    SIMULATION_FREQ_HZ = int(len(TRACE_TIMESTAMPS)/TRACE_TIMESTAMPS[-1])
-
-    #### Initialize the simulation #############################
-    env = CtrlAviary(drone_model=DroneModel.CF2X,
-                     num_drones=1,
-                     initial_xyzs=np.array([0, 0, .1]).reshape(1, 3),
-                     physics=physics,
-                     freq=SIMULATION_FREQ_HZ,
-                     gui=gui,
-                     record=record_video,
-                     obstacles=False
-                     )
-    INITIAL_STATE = env.reset()
-    action = {"0": np.zeros(4)}
-    pos_err = 9999.
-
-    #### TRACE_FILE starts at [0,0,0], sim starts at [0,0,INITIAL_STATE[2]]
-    TRACE_CTRL_REFERENCE[:, 2] = INITIAL_STATE["0"]["state"][2]
-
-    #### Initialize the logger #################################
-    logger = Logger(logging_freq_hz=SIMULATION_FREQ_HZ,
-                    num_drones=2,
-                    duration_sec=DURATION_SEC,
-                    output_folder=output_folder,
-                    colab=colab
-                    )
-
-    #### Initialize the controller #############################
-    ctrl = DSLPIDControl(drone_model=env.DRONE_MODEL)
-
-    #### Run the comparison ####################################
-    START = time.time()
-    for i in range(DURATION_SEC*env.SIM_FREQ):
-
-        #### Step the simulation ###################################
-        obs, reward, done, info = env.step(action)
-
-        #### Compute next action using the set points from the trace
-        action["0"], pos_err, yaw_err = ctrl.computeControlFromState(control_timestep=env.TIMESTEP,
-                                                                     state=obs["0"]["state"],
-                                                                     target_pos=TRACE_CTRL_REFERENCE[i, 0:3],
-                                                                     target_vel=TRACE_CTRL_REFERENCE[i, 3:6]
-                                                                     )
-
-        #### Re-arrange the trace for consistency with the logger
-        trace_obs = np.hstack([TRACE_DATA[i, 0:3], np.zeros(4), TRACE_DATA[i, 6:9], TRACE_DATA[i, 3:6], TRACE_DATA[i, 9:12], TRACE_DATA[i, 12:16]])
-
-        #### Log the trace #########################################
-        logger.log(drone=0,
-                   timestamp=TRACE_TIMESTAMPS[i],
-                   state=trace_obs,
-                   control=np.hstack([TRACE_CTRL_REFERENCE[i, :], np.zeros(6)])
-                   )
-
-        #### Log the simulation ####################################
-        logger.log(drone=1,
-                   timestamp=i/env.SIM_FREQ,
-                   state=obs["0"]["state"],
-                   control=np.hstack([TRACE_CTRL_REFERENCE[i, :], np.zeros(6)])
-                   )
-
-        #### Printout ##############################################
-        if i%env.SIM_FREQ == 0: 
-            env.render()
-
-        #### Sync the simulation ###################################
-        if gui: 
-            sync(i, START, env.TIMESTEP)
-
-    #### Close the environment #################################
-    env.close()
-
-    #### Save the simulation results ###########################
-    logger.save()
-
-    #### Plot the simulation results ###########################
-    if plot:
-        logger.plot(pwm=True)
-
-if __name__ == "__main__":
-    #### Define and parse (optional) arguments for the script ##
-    parser = argparse.ArgumentParser(description='Trace comparison script using CtrlAviary and DSLPIDControl')
-    parser.add_argument('--physics',        default=DEFAULT_PHYICS,               type=Physics,       help='Physics updates (default: PYB)', metavar='', choices=Physics)
-    parser.add_argument('--gui',            default=DEFAULT_GUI,               type=str2bool,      help='Whether to use PyBullet GUI (default: False)', metavar='')
-    parser.add_argument('--record_video',   default=DEFAULT_RECORD_VIDEO,               type=str2bool,      help='Whether to record a video (default: False)', metavar='')
-    parser.add_argument('--trace_file',     default=DEFAULT_TRACE_FILE, type=str,           help='Pickle file with the trace to compare to (default: "example_trace.pkl")', metavar='')
-    parser.add_argument('--output_folder',     default=DEFAULT_OUTPUT_FOLDER, type=str,           help='Folder where to save logs (default: "results")', metavar='')
-    parser.add_argument('--colab',              default=DEFAULT_COLAB, type=bool,           help='Whether example is being run by a notebook (default: "False")', metavar='')
-    ARGS = parser.parse_args()
-
-    run(**vars(ARGS))
\ No newline at end of file
diff --git a/gym_pybullet_drones/examples/groundeffect.py b/gym_pybullet_drones/examples/groundeffect.py
deleted file mode 100644
index 52fe3f2f3..000000000
--- a/gym_pybullet_drones/examples/groundeffect.py
+++ /dev/null
@@ -1,167 +0,0 @@
-"""Script demonstrating the ground effect contribution.
-
-The simulation is run by a `CtrlAviary` environment.
-
-Example
--------
-In a terminal, run as:
-
-    $ python groundeffect.py
-
-Notes
------
-The drone altitude tracks a sinusoid, near the ground plane.
-
-"""
-import os
-import time
-import argparse
-from datetime import datetime
-import pdb
-import math
-import random
-import numpy as np
-import pybullet as p
-import matplotlib.pyplot as plt
-
-from gym_pybullet_drones.utils.enums import DroneModel, Physics
-from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary
-from gym_pybullet_drones.envs.VisionAviary import VisionAviary
-from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl
-from gym_pybullet_drones.control.SimplePIDControl import SimplePIDControl
-from gym_pybullet_drones.utils.Logger import Logger
-from gym_pybullet_drones.utils.utils import sync, str2bool
-
-DEFAULT_GUI = True
-DEFAULT_RECORD_VIDEO = False
-DEFAULT_PLOT = True
-DEFAULT_USER_DEBUG_GUI = False
-DEFAULT_AGGREGATE = True
-DEFAULT_OBSTACLES = True
-DEFAULT_SIMULATION_FREQ_HZ = 240
-DEFAULT_CONTROL_FREQ_HZ = 240
-DEFAULT_DURATION_SEC = 4
-DEFAULT_OUTPUT_FOLDER = 'results'
-DEFAULT_COLAB = False
-
-def run(
-        gui=DEFAULT_GUI,
-        record_video=DEFAULT_RECORD_VIDEO,
-        plot=DEFAULT_PLOT,
-        user_debug_gui=DEFAULT_USER_DEBUG_GUI,
-        aggregate=DEFAULT_AGGREGATE,
-        obstacles=DEFAULT_OBSTACLES,
-        simulation_freq_hz=DEFAULT_SIMULATION_FREQ_HZ,
-        control_freq_hz=DEFAULT_CONTROL_FREQ_HZ,
-        duration_sec=DEFAULT_DURATION_SEC,
-        output_folder=DEFAULT_OUTPUT_FOLDER,
-        colab=DEFAULT_COLAB
-        ):
-
-    #### Initialize the simulation #############################
-    INIT_XYZ = np.array([0, 0, 0.014]).reshape(1,3)
-    AGGR_PHY_STEPS = int(simulation_freq_hz/control_freq_hz) if aggregate else 1
-
-    #### Initialize a vertical trajectory ######################
-    PERIOD = 4
-    NUM_WP = control_freq_hz*PERIOD
-    TARGET_POS = np.zeros((NUM_WP,3))
-    for i in range(NUM_WP):
-        TARGET_POS[i, :] = INIT_XYZ[0, 0], INIT_XYZ[0, 1], INIT_XYZ[0, 2] + 0.15 * (np.sin((i/NUM_WP)*(2*np.pi)) + 1)
-    wp_counter = 0
-
-    #### Create the environment ################################
-    env = CtrlAviary(drone_model=DroneModel.CF2X,
-                     num_drones=1,
-                     initial_xyzs=INIT_XYZ,
-                     physics=Physics.PYB_GND,
-                     # physics=Physics.PYB, # For comparison
-                     neighbourhood_radius=10,
-                     freq=simulation_freq_hz,
-                     aggregate_phy_steps=AGGR_PHY_STEPS,
-                     gui=gui,
-                     record=record_video,
-                     obstacles=obstacles,
-                     user_debug_gui=user_debug_gui
-                     )
-
-    #### Obtain the PyBullet Client ID from the environment ####
-    PYB_CLIENT = env.getPyBulletClient()
-
-    #### Initialize the logger #################################
-    logger = Logger(logging_freq_hz=int(simulation_freq_hz/AGGR_PHY_STEPS),
-                    num_drones=1,
-                    output_folder=output_folder,
-                    colab=colab
-                    )
-
-    #### Initialize the controller #############################
-    ctrl = DSLPIDControl(drone_model=DroneModel.CF2X)
-
-    #### Run the simulation ####################################
-    CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ/control_freq_hz))
-    action = {"0": np.array([0,0,0,0])}
-    START = time.time()
-    for i in range(0, int(duration_sec*env.SIM_FREQ), AGGR_PHY_STEPS):
-
-        #### Make it rain rubber ducks #############################
-        # if i/env.SIM_FREQ>5 and i%10==0 and i/env.SIM_FREQ<10: p.loadURDF("duck_vhacd.urdf", [0+random.gauss(0, 0.3),-0.5+random.gauss(0, 0.3),3], p.getQuaternionFromEuler([random.randint(0,360),random.randint(0,360),random.randint(0,360)]), physicsClientId=PYB_CLIENT)
-
-        #### Step the simulation ###################################
-        obs, reward, done, info = env.step(action)
-
-        #### Compute control at the desired frequency ##############
-        if i%CTRL_EVERY_N_STEPS == 0:
-
-            #### Compute control for the current way point #############
-            action["0"], _, _ = ctrl.computeControlFromState(control_timestep=CTRL_EVERY_N_STEPS*env.TIMESTEP,
-                                                             state=obs["0"]["state"],
-                                                             target_pos=TARGET_POS[wp_counter, :],
-                                                             )
-
-            #### Go to the next way point and loop #####################
-            wp_counter = wp_counter + 1 if wp_counter < (NUM_WP-1) else 0
-
-        #### Log the simulation ####################################
-        logger.log(drone=0,
-                   timestamp=i/env.SIM_FREQ,
-                   state= obs["0"]["state"],
-                   control=np.hstack([TARGET_POS[wp_counter, :], np.zeros(9)])
-                   )
-
-        #### Printout ##############################################
-        if i%env.SIM_FREQ == 0:
-            env.render()
-
-        #### Sync the simulation ###################################
-        if gui:
-            sync(i, START, env.TIMESTEP)
-
-    #### Close the environment #################################
-    env.close()
-
-    #### Save the simulation results ###########################
-    logger.save()
-    logger.save_as_csv("gnd") # Optional CSV save
-
-    #### Plot the simulation results ###########################
-    if plot:
-        logger.plot()
-
-if __name__ == "__main__":
-    #### Define and parse (optional) arguments for the script ##
-    parser = argparse.ArgumentParser(description='Ground effect script using CtrlAviary and DSLPIDControl')
-    parser.add_argument('--gui',                default=DEFAULT_GUI,       type=str2bool,      help='Whether to use PyBullet GUI (default: True)', metavar='')
-    parser.add_argument('--record_video',       default=DEFAULT_RECORD_VIDEO,      type=str2bool,      help='Whether to record a video (default: False)', metavar='')
-    parser.add_argument('--plot',               default=DEFAULT_PLOT,       type=str2bool,      help='Whether to plot the simulation results (default: True)', metavar='')
-    parser.add_argument('--user_debug_gui',     default=DEFAULT_USER_DEBUG_GUI,      type=str2bool,      help='Whether to add debug lines and parameters to the GUI (default: False)', metavar='')
-    parser.add_argument('--aggregate',          default=DEFAULT_AGGREGATE,       type=str2bool,      help='Whether to aggregate physics steps (default: False)', metavar='')
-    parser.add_argument('--obstacles',          default=DEFAULT_OBSTACLES,       type=str2bool,      help='Whether to add obstacles to the environment (default: True)', metavar='')
-    parser.add_argument('--simulation_freq_hz', default=DEFAULT_SIMULATION_FREQ_HZ,        type=int,           help='Simulation frequency in Hz (default: 240)', metavar='')
-    parser.add_argument('--control_freq_hz',    default=DEFAULT_CONTROL_FREQ_HZ,        type=int,           help='Control frequency in Hz (default: 48)', metavar='')
-    parser.add_argument('--duration_sec',       default=DEFAULT_DURATION_SEC,          type=int,           help='Duration of the simulation in seconds (default: 5)', metavar='')
-    parser.add_argument('--output_folder',     default=DEFAULT_OUTPUT_FOLDER, type=str,           help='Folder where to save logs (default: "results")', metavar='')
-    parser.add_argument('--colab',              default=DEFAULT_COLAB, type=bool,           help='Whether example is being run by a notebook (default: "False")', metavar='')
-    ARGS = parser.parse_args()
-
-    run(**vars(ARGS))
diff --git a/gym_pybullet_drones/examples/learn.py b/gym_pybullet_drones/examples/learning/learn.py
similarity index 100%
rename from gym_pybullet_drones/examples/learn.py
rename to gym_pybullet_drones/examples/learning/learn.py
diff --git a/gym_pybullet_drones/examples/learning/sa_script.bash b/gym_pybullet_drones/examples/learning/sa_script.bash
new file mode 100755
index 000000000..1bfd99a82
--- /dev/null
+++ b/gym_pybullet_drones/examples/learning/sa_script.bash
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# declare -a env_list=( 'takeoff' 'hover' 'flythrugate' )
+# declare -a algo_list=( 'a2c' 'ppo' 'sac' 'td3' 'ddpg' )
+# declare -a obs_list=( 'kin' 'rgb' )
+# declare -a act_list=( 'rpm' 'dyn' 'pid' 'one_d_rpm' 'one_d_dyn' 'one_d_pid' )
+
+# Note: 3*5*2*2=60 jobs; do not launch at once
+
+declare -a env_list=( 'hover' )
+declare -a algo_list=( 'ppo' 'sac' 'ddpg' )
+declare -a obs_list=( 'kin' 'rgb' )
+declare -a act_list=( 'one_d_rpm' )
+
+for env in ${env_list[@]}; do
+    for algo in ${algo_list[@]}; do
+        for obs in ${obs_list[@]}; do
+            for act in ${act_list[@]}; do
+                sbatch --export=env=$env,algo=$algo,obs=$obs,act=$act sa_script.slrm
+            done
+        done
+    done
+done
diff --git a/gym_pybullet_drones/examples/learning/sa_script.slrm b/gym_pybullet_drones/examples/learning/sa_script.slrm
new file mode 100644
index 000000000..1e9464ba0
--- /dev/null
+++ b/gym_pybullet_drones/examples/learning/sa_script.slrm
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Use as sbatch --export=env="takeoff",algo="a2c",obs="kin",act="rpm" sa_script.slrm
+
+#SBATCH --job-name=pyb_drones
+#SBATCH --output=/scratch/gobi1/jacopo/gym-pybullet-drones/experiments/learning/results/output-%N-%j.out     # For output and error, use absolute paths that exists already
+#SBATCH --error=/scratch/gobi1/jacopo/gym-pybullet-drones/experiments/learning/results/error-%N-%j.err       # %N and %j will be replaced by the host name and job id, respectively
+#SBATCH --open-mode=append
+#SBATCH --partition=cpu                              # self-explanatory, set to your preference (e.g. gpu or cpu on MaRS, p100, t4, or cpu on Vaughan)
+#SBATCH --cpus-per-task=16                           # self-explanatory, set to your preference
+#SBATCH --ntasks-per-node=1
+#SBATCH --mem=64G                                    # self-explanatory, set to your preference
+#SBATCH --gres=gpu:0                                 # NOTE: you need a GPU for CUDA support; self-explanatory, set to your preference 
+#SBATCH --nodes=1
+#SBATCH --qos=normal                                 # for "high" and "deadline" QoS, refer to https://support.vectorinstitute.ai/AboutVaughan2
+
+now=$(date +"%Y_%m_%d-%H_%M_%S")                     # current date
+
+log_file="/scratch/gobi1/jacopo/gym-pybullet-drones/experiments/learning/results/log-$env-$algo-$obs-$act-3D-$now.log"
+ 
+echo $SLURM_JOB_PARTITION >> $log_file               # log the job partition
+
+echo $SLURM_JOB_ID >> $log_file                      # log the job id
+
+export LD_LIBRARY_PATH=/pkgs/cuda-10.1/lib64:/pkgs/cudnn-10.1-v7.6.3.30/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}    # required for TensorFlow to see any GPU
+
+export PATH=/pkgs/anaconda3/bin:$PATH                # exporting the binaries of your preferred version of conda
+
+source activate /scratch/gobi1/$USER/learning/       # NOTE: set the path to your conda environment path
+
+echo $CONDA_PREFIX >> $log_file                      # log the active conda environment 
+
+python --version >> $log_file                        # log Python version
+
+# pip install --user --upgrade gym
+# pip install --user --upgrade ray[rllib]
+# pip install --user --upgrade pybullet
+# pip install --user --upgrade stable-baselines3
+
+cd /scratch/gobi1/$USER/gym-pybullet-drones/
+pip install --user --upgrade --verbose --editable .
+
+if [ "$algo" = "a2c" ] || [ "$algo" = "ppo" ]; then
+
+    python /scratch/gobi1/$USER/gym-pybullet-drones/experiments/learning/singleagent.py --env $env --algo $algo --obs $obs --act $act --cpu 4 >> $log_file    # the script, with its standard output appended to gym_py_drones_job.log
+
+else
+
+    python /scratch/gobi1/$USER/gym-pybullet-drones/experiments/learning/singleagent.py --env $env --algo $algo --obs $obs --act $act --cpu 1 >> $log_file    # the script, with its standard output appended to gym_py_drones_job.log
+
+fi
+
+source deactivate
diff --git a/gym_pybullet_drones/examples/learning/shared_constants.py b/gym_pybullet_drones/examples/learning/shared_constants.py
new file mode 100644
index 000000000..f62086b9a
--- /dev/null
+++ b/gym_pybullet_drones/examples/learning/shared_constants.py
@@ -0,0 +1,2 @@
+AGGR_PHY_STEPS = 5
+"""int: Aggregate PyBullet/physics steps within a single call to env.step()."""
diff --git a/gym_pybullet_drones/examples/learning/singleagent.py b/gym_pybullet_drones/examples/learning/singleagent.py
new file mode 100644
index 000000000..15ea0c9a3
--- /dev/null
+++ b/gym_pybullet_drones/examples/learning/singleagent.py
@@ -0,0 +1,283 @@
+"""Learning script for single agent problems.
+
+Agents are based on `stable_baselines3`'s implementation of A2C, PPO SAC, TD3, DDPG.
+
+Example
+-------
+To run the script, type in a terminal:
+
+    $ python singleagent.py --env <env> --algo <alg> --obs <ObservationType> --act <ActionType> --cpu <cpu_num>
+
+Notes
+-----
+Use:
+
+    $ tensorboard --logdir ./results/save-<env>-<algo>-<obs>-<act>-<time-date>/tb/
+
+To check the tensorboard results at:
+
+    http://localhost:6006/
+
+"""
+import os
+import time
+from datetime import datetime
+from sys import platform
+import argparse
+import subprocess
+import numpy as np
+import gym
+import torch
+from stable_baselines3.common.env_checker import check_env
+from stable_baselines3.common.cmd_util import make_vec_env # Module cmd_util will be renamed to env_util https://github.com/DLR-RM/stable-baselines3/pull/197
+from stable_baselines3.common.vec_env import SubprocVecEnv, VecTransposeImage
+from stable_baselines3.common.utils import set_random_seed
+from stable_baselines3 import A2C
+from stable_baselines3 import PPO
+from stable_baselines3 import SAC
+from stable_baselines3 import TD3
+from stable_baselines3 import DDPG
+from stable_baselines3.common.policies import ActorCriticPolicy as a2cppoMlpPolicy
+from stable_baselines3.common.policies import ActorCriticCnnPolicy as a2cppoCnnPolicy
+from stable_baselines3.sac.policies import SACPolicy as sacMlpPolicy
+from stable_baselines3.sac import CnnPolicy as sacCnnPolicy
+from stable_baselines3.td3 import MlpPolicy as td3ddpgMlpPolicy
+from stable_baselines3.td3 import CnnPolicy as td3ddpgCnnPolicy
+from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback, StopTrainingOnRewardThreshold
+
+from gym_pybullet_drones.envs.single_agent_rl.TakeoffAviary import TakeoffAviary
+from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary
+from gym_pybullet_drones.envs.single_agent_rl.FlyThruGateAviary import FlyThruGateAviary
+from gym_pybullet_drones.envs.single_agent_rl.TuneAviary import TuneAviary
+from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
+
+import shared_constants
+
+EPISODE_REWARD_THRESHOLD = -0 # Upperbound: rewards are always negative, but non-zero
+"""float: Reward threshold to halt the script."""
+
+DEFAULT_ENV = 'hover'
+DEFAULT_ALGO = 'ppo'
+DEFAULT_OBS = ObservationType('kin')
+DEFAULT_ACT = ActionType('one_d_rpm')
+DEFAULT_CPU = 1
+DEFAULT_STEPS = 35000
+DEFAULT_OUTPUT_FOLDER = 'results'
+
+def run(
+    env=DEFAULT_ENV,
+    algo=DEFAULT_ALGO,
+    obs=DEFAULT_OBS,
+    act=DEFAULT_ACT,
+    cpu=DEFAULT_CPU,
+    steps=DEFAULT_STEPS,
+    output_folder=DEFAULT_OUTPUT_FOLDER
+):
+
+    #### Save directory ########################################
+    filename = os.path.join(output_folder, 'save-'+env+'-'+algo+'-'+obs.value+'-'+act.value+'-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S"))
+    if not os.path.exists(filename):
+        os.makedirs(filename+'/')
+
+    #### Print out current git commit hash #####################
+    if (platform == "linux" or platform == "darwin") and ('GITHUB_ACTIONS' not in os.environ.keys()):
+        git_commit = subprocess.check_output(["git", "describe", "--tags"]).strip()
+        with open(filename+'/git_commit.txt', 'w+') as f:
+            f.write(str(git_commit))
+
+    #### Warning ###############################################
+    if env == 'tune' and act != ActionType.TUN:
+        print("\n\n\n[WARNING] TuneAviary is intended for use with ActionType.TUN\n\n\n")
+    if act == ActionType.ONE_D_RPM or act == ActionType.ONE_D_DYN or act == ActionType.ONE_D_PID:
+        print("\n\n\n[WARNING] Simplified 1D problem for debugging purposes\n\n\n")
+    #### Errors ################################################
+        if not env in ['takeoff', 'hover']: 
+            print("[ERROR] 1D action space is only compatible with Takeoff and HoverAviary")
+            exit()
+    if act == ActionType.TUN and env != 'tune' :
+        print("[ERROR] ActionType.TUN is only compatible with TuneAviary")
+        exit()
+    if algo in ['sac', 'td3', 'ddpg'] and cpu!=1: 
+        print("[ERROR] The selected algorithm does not support multiple environments")
+        exit()
+
+    #### Uncomment to debug slurm scripts ######################
+    # exit()
+
+    env_name = env+"-aviary-v0"
+    sa_env_kwargs = dict(aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act)
+    # train_env = gym.make(env_name, aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act) # single environment instead of a vectorized one    
+    if env_name == "takeoff-aviary-v0":
+        train_env = make_vec_env(TakeoffAviary,
+                                 env_kwargs=sa_env_kwargs,
+                                 n_envs=cpu,
+                                 seed=0
+                                 )
+    if env_name == "hover-aviary-v0":
+        train_env = make_vec_env(HoverAviary,
+                                 env_kwargs=sa_env_kwargs,
+                                 n_envs=cpu,
+                                 seed=0
+                                 )
+    if env_name == "flythrugate-aviary-v0":
+        train_env = make_vec_env(FlyThruGateAviary,
+                                 env_kwargs=sa_env_kwargs,
+                                 n_envs=cpu,
+                                 seed=0
+                                 )
+    if env_name == "tune-aviary-v0":
+        train_env = make_vec_env(TuneAviary,
+                                 env_kwargs=sa_env_kwargs,
+                                 n_envs=cpu,
+                                 seed=0
+                                 )
+    print("[INFO] Action space:", train_env.action_space)
+    print("[INFO] Observation space:", train_env.observation_space)
+    # check_env(train_env, warn=True, skip_render_check=True)
+    
+    #### On-policy algorithms ##################################
+    onpolicy_kwargs = dict(activation_fn=torch.nn.ReLU,
+                           net_arch=[512, 512, dict(vf=[256, 128], pi=[256, 128])]
+                           ) # or None
+    if algo == 'a2c':
+        model = A2C(a2cppoMlpPolicy,
+                    train_env,
+                    policy_kwargs=onpolicy_kwargs,
+                    tensorboard_log=filename+'/tb/',
+                    verbose=1
+                    ) if obs == ObservationType.KIN else A2C(a2cppoCnnPolicy,
+                                                                  train_env,
+                                                                  policy_kwargs=onpolicy_kwargs,
+                                                                  tensorboard_log=filename+'/tb/',
+                                                                  verbose=1
+                                                                  )
+    if algo == 'ppo':
+        model = PPO(a2cppoMlpPolicy,
+                    train_env,
+                    policy_kwargs=onpolicy_kwargs,
+                    tensorboard_log=filename+'/tb/',
+                    verbose=1
+                    ) if obs == ObservationType.KIN else PPO(a2cppoCnnPolicy,
+                                                                  train_env,
+                                                                  policy_kwargs=onpolicy_kwargs,
+                                                                  tensorboard_log=filename+'/tb/',
+                                                                  verbose=1
+                                                                  )
+
+    #### Off-policy algorithms #################################
+    offpolicy_kwargs = dict(activation_fn=torch.nn.ReLU,
+                            net_arch=[512, 512, 256, 128]
+                            ) # or None # or dict(net_arch=dict(qf=[256, 128, 64, 32], pi=[256, 128, 64, 32]))
+    if algo == 'sac':
+        model = SAC(sacMlpPolicy,
+                    train_env,
+                    policy_kwargs=offpolicy_kwargs,
+                    tensorboard_log=filename+'/tb/',
+                    verbose=1
+                    ) if obs==ObservationType.KIN else SAC(sacCnnPolicy,
+                                                                train_env,
+                                                                policy_kwargs=offpolicy_kwargs,
+                                                                tensorboard_log=filename+'/tb/',
+                                                                verbose=1
+                                                                )
+    if algo == 'td3':
+        model = TD3(td3ddpgMlpPolicy,
+                    train_env,
+                    policy_kwargs=offpolicy_kwargs,
+                    tensorboard_log=filename+'/tb/',
+                    verbose=1
+                    ) if obs==ObservationType.KIN else TD3(td3ddpgCnnPolicy,
+                                                                train_env,
+                                                                policy_kwargs=offpolicy_kwargs,
+                                                                tensorboard_log=filename+'/tb/',
+                                                                verbose=1
+                                                                )
+    if algo == 'ddpg':
+        model = DDPG(td3ddpgMlpPolicy,
+                    train_env,
+                    policy_kwargs=offpolicy_kwargs,
+                    tensorboard_log=filename+'/tb/',
+                    verbose=1
+                    ) if obs==ObservationType.KIN else DDPG(td3ddpgCnnPolicy,
+                                                                train_env,
+                                                                policy_kwargs=offpolicy_kwargs,
+                                                                tensorboard_log=filename+'/tb/',
+                                                                verbose=1
+                                                                )
+
+    #### Create eveluation environment #########################
+    if obs == ObservationType.KIN: 
+        eval_env = gym.make(env_name,
+                            aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
+                            obs=obs,
+                            act=act
+                            )
+    elif obs == ObservationType.RGB:
+        if env_name == "takeoff-aviary-v0": 
+            eval_env = make_vec_env(TakeoffAviary,
+                                    env_kwargs=sa_env_kwargs,
+                                    n_envs=1,
+                                    seed=0
+                                    )
+        if env_name == "hover-aviary-v0": 
+            eval_env = make_vec_env(HoverAviary,
+                                    env_kwargs=sa_env_kwargs,
+                                    n_envs=1,
+                                    seed=0
+                                    )
+        if env_name == "flythrugate-aviary-v0": 
+            eval_env = make_vec_env(FlyThruGateAviary,
+                                    env_kwargs=sa_env_kwargs,
+                                    n_envs=1,
+                                    seed=0
+                                    )
+        if env_name == "tune-aviary-v0": 
+            eval_env = make_vec_env(TuneAviary,
+                                    env_kwargs=sa_env_kwargs,
+                                    n_envs=1,
+                                    seed=0
+                                    )
+        eval_env = VecTransposeImage(eval_env)
+
+    #### Train the model #######################################
+    # checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=filename+'-logs/', name_prefix='rl_model')
+    callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=EPISODE_REWARD_THRESHOLD,
+                                                     verbose=1
+                                                     )
+    eval_callback = EvalCallback(eval_env,
+                                 callback_on_new_best=callback_on_best,
+                                 verbose=1,
+                                 best_model_save_path=filename+'/',
+                                 log_path=filename+'/',
+                                 eval_freq=int(2000/cpu),
+                                 deterministic=True,
+                                 render=False
+                                 )
+    model.learn(total_timesteps=steps, #int(1e12),
+                callback=eval_callback,
+                log_interval=100,
+                )
+
+    #### Save the model ########################################
+    model.save(filename+'/success_model.zip')
+    print(filename)
+
+    #### Print training progression ############################
+    with np.load(filename+'/evaluations.npz') as data:
+        for j in range(data['timesteps'].shape[0]):
+            print(str(data['timesteps'][j])+","+str(data['results'][j][0]))
+
+
+if __name__ == "__main__":
+    #### Define and parse (optional) arguments for the script ##
+    parser = argparse.ArgumentParser(description='Single agent reinforcement learning experiments script')
+    parser.add_argument('--env',        default=DEFAULT_ENV,      type=str,             choices=['takeoff', 'hover', 'flythrugate', 'tune'], help='Task (default: hover)', metavar='')
+    parser.add_argument('--algo',       default=DEFAULT_ALGO,        type=str,             choices=['a2c', 'ppo', 'sac', 'td3', 'ddpg'],        help='RL agent (default: ppo)', metavar='')
+    parser.add_argument('--obs',        default=DEFAULT_OBS,        type=ObservationType,                                                      help='Observation space (default: kin)', metavar='')
+    parser.add_argument('--act',        default=DEFAULT_ACT,  type=ActionType,                                                           help='Action space (default: one_d_rpm)', metavar='')
+    parser.add_argument('--cpu',        default=DEFAULT_CPU,          type=int,                                                                  help='Number of training environments (default: 1)', metavar='')        
+    parser.add_argument('--steps',        default=DEFAULT_STEPS,          type=int,                                                                  help='Number of training time steps (default: 35000)', metavar='')        
+    parser.add_argument('--output_folder',     default=DEFAULT_OUTPUT_FOLDER, type=str,           help='Folder where to save logs (default: "results")', metavar='')
+    ARGS = parser.parse_args()
+
+    run(**vars(ARGS))
\ No newline at end of file
diff --git a/gym_pybullet_drones/examples/learning/test_singleagent.py b/gym_pybullet_drones/examples/learning/test_singleagent.py
new file mode 100644
index 000000000..dc7e8c033
--- /dev/null
+++ b/gym_pybullet_drones/examples/learning/test_singleagent.py
@@ -0,0 +1,141 @@
+"""Test script for single agent problems.
+
+This scripts runs the best model found by one of the executions of `singleagent.py`
+
+Example
+-------
+To run the script, type in a terminal:
+
+    $ python test_singleagent.py --exp ./results/save-<env>-<algo>-<obs>-<act>-<time_date>
+
+"""
+import os
+import time
+from datetime import datetime
+import argparse
+import re
+import numpy as np
+import gym
+import torch
+from stable_baselines3.common.env_checker import check_env
+from stable_baselines3 import A2C
+from stable_baselines3 import PPO
+from stable_baselines3 import SAC
+from stable_baselines3 import TD3
+from stable_baselines3 import DDPG
+from stable_baselines3.common.policies import ActorCriticPolicy as a2cppoMlpPolicy
+from stable_baselines3.common.policies import ActorCriticCnnPolicy as a2cppoCnnPolicy
+from stable_baselines3.sac.policies import SACPolicy as sacMlpPolicy
+from stable_baselines3.sac import CnnPolicy as sacCnnPolicy
+from stable_baselines3.td3 import MlpPolicy as td3ddpgMlpPolicy
+from stable_baselines3.td3 import CnnPolicy as td3ddpgCnnPolicy
+from stable_baselines3.common.evaluation import evaluate_policy
+
+from gym_pybullet_drones.utils.utils import sync
+from gym_pybullet_drones.utils.Logger import Logger
+from gym_pybullet_drones.envs.single_agent_rl.TakeoffAviary import TakeoffAviary
+from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary
+from gym_pybullet_drones.envs.single_agent_rl.FlyThruGateAviary import FlyThruGateAviary
+from gym_pybullet_drones.envs.single_agent_rl.TuneAviary import TuneAviary
+from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
+from gym_pybullet_drones.utils.utils import sync, str2bool
+
+import shared_constants
+
+DEFAULT_GUI = True
+DEFAULT_PLOT = True
+DEFAULT_OUTPUT_FOLDER = 'results'
+
+def run(exp, gui=DEFAULT_GUI, plot=DEFAULT_PLOT, output_folder=DEFAULT_OUTPUT_FOLDER):
+    #### Load the model from file ##############################
+    algo = exp.split("-")[2]
+
+    if os.path.isfile(exp+'/success_model.zip'):
+        path = exp+'/success_model.zip'
+    elif os.path.isfile(exp+'/best_model.zip'):
+        path = exp+'/best_model.zip'
+    else:
+        print("[ERROR]: no model under the specified path", exp)
+    if algo == 'a2c':
+        model = A2C.load(path)
+    if algo == 'ppo':
+        model = PPO.load(path)
+    if algo == 'sac':
+        model = SAC.load(path)
+    if algo == 'td3':
+        model = TD3.load(path)
+    if algo == 'ddpg':
+        model = DDPG.load(path)
+
+    #### Parameters to recreate the environment ################
+    env_name = exp.split("-")[1]+"-aviary-v0"
+    OBS = ObservationType.KIN if exp.split("-")[3] == 'kin' else ObservationType.RGB
+
+    # Parse ActionType instance from file name
+    action_name = exp.split("-")[4]
+    ACT = [action for action in ActionType if action.value == action_name]
+    if len(ACT) != 1:
+        raise AssertionError("Result file could have gotten corrupted. Extracted action type does not match any of the existing ones.")
+    ACT = ACT.pop()
+
+    #### Evaluate the model ####################################
+    eval_env = gym.make(env_name,
+                        aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
+                        obs=OBS,
+                        act=ACT
+                        )
+    mean_reward, std_reward = evaluate_policy(model,
+                                              eval_env,
+                                              n_eval_episodes=10
+                                              )
+    print("\n\n\nMean reward ", mean_reward, " +- ", std_reward, "\n\n")
+
+    #### Show, record a video, and log the model's performance #
+    test_env = gym.make(env_name,
+                        gui=gui,
+                        record=False,
+                        aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
+                        obs=OBS,
+                        act=ACT
+                        )
+    logger = Logger(logging_freq_hz=int(test_env.SIM_FREQ/test_env.AGGR_PHY_STEPS),
+                    num_drones=1,
+                    output_folder=output_folder
+                    )
+    obs = test_env.reset()
+    start = time.time()
+    for i in range(6*int(test_env.SIM_FREQ/test_env.AGGR_PHY_STEPS)): # Up to 6''
+        action, _states = model.predict(obs,
+                                        deterministic=True # OPTIONAL 'deterministic=False'
+                                        )
+        obs, reward, done, info = test_env.step(action)
+        test_env.render()
+        if OBS==ObservationType.KIN:
+            logger.log(drone=0,
+                       timestamp=i/test_env.SIM_FREQ,
+                       state= np.hstack([obs[0:3], np.zeros(4), obs[3:15],  np.resize(action, (4))]),
+                       control=np.zeros(12)
+                       )
+        sync(np.floor(i*test_env.AGGR_PHY_STEPS), start, test_env.TIMESTEP)
+        # if done: obs = test_env.reset() # OPTIONAL EPISODE HALT
+    test_env.close()
+    logger.save_as_csv("sa") # Optional CSV save
+    if plot:
+        logger.plot()
+
+    # with np.load(exp+'/evaluations.npz') as data:
+    #     print(data.files)
+    #     print(data['timesteps'])
+    #     print(data['results'])
+    #     print(data['ep_lengths'])
+
+if __name__ == "__main__":
+    #### Define and parse (optional) arguments for the script ##
+    parser = argparse.ArgumentParser(description='Single agent reinforcement learning example script using TakeoffAviary')
+    parser.add_argument('--exp',                           type=str,            help='The experiment folder written as ./results/save-<env>-<algo>-<obs>-<act>-<time_date>', metavar='')
+    parser.add_argument('--gui',            default=DEFAULT_GUI,               type=str2bool,      help='Whether to use PyBullet GUI (default: False)', metavar='')
+    parser.add_argument('--plot',               default=DEFAULT_PLOT,       type=str2bool,      help='Whether to plot the simulation results (default: True)', metavar='')
+    parser.add_argument('--output_folder',     default=DEFAULT_OUTPUT_FOLDER, type=str,           help='Folder where to save logs (default: "results")', metavar='')
+    ARGS = parser.parse_args()
+
+    run(**vars(ARGS))
\ No newline at end of file
diff --git a/gym_pybullet_drones/examples/velocity.py b/gym_pybullet_drones/examples/pid-velocity.py
similarity index 100%
rename from gym_pybullet_drones/examples/velocity.py
rename to gym_pybullet_drones/examples/pid-velocity.py
diff --git a/gym_pybullet_drones/examples/fly.py b/gym_pybullet_drones/examples/pid.py
similarity index 100%
rename from gym_pybullet_drones/examples/fly.py
rename to gym_pybullet_drones/examples/pid.py
diff --git a/pyproject.toml b/pyproject.toml
index d2a032ae5..7b1b1147a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "gym-pybullet-drones"
-version = "0.0.3"
+version = "0.2.0"
 description = "PyBullet Gym environments for single and multi-agent reinforcement learning of quadcopter control"
 authors = ["Jacopo Panerati <jacopo.panerati@utoronto.ca>"]
 license = "MIT"
@@ -18,10 +18,10 @@ matplotlib = "^3.5"
 cycler = "^0.10"
 gym = "^0.21"
 pybullet = "^3.2"
-torch = "1.11.0"
-"ray[rllib]" = "1.9"
 stable-baselines3 = "1.5.0"
 scipy = "^1.8"
+torch = "1.11.0"
+"ray[rllib]" = "1.9"
 tensorboard = "^2.9"
 
 [tool.poetry.dev-dependencies]