From ec44b64483be9a4ad291c5095e8dc353198f34a6 Mon Sep 17 00:00:00 2001
From: Jacopo Panerati <jacopo.panerati@utoronto.ca>
Date: Mon, 20 Nov 2023 21:48:46 +0400
Subject: [PATCH] Tested learn.py with obs spaces and act spaces except one d
 pid

---
 gym_pybullet_drones/envs/BaseAviary.py | 19 ------------
 gym_pybullet_drones/examples/learn.py  | 43 +++++++++++++-------------
 2 files changed, 22 insertions(+), 40 deletions(-)

diff --git a/gym_pybullet_drones/envs/BaseAviary.py b/gym_pybullet_drones/envs/BaseAviary.py
index 70b0a40a4..8de25c03c 100755
--- a/gym_pybullet_drones/envs/BaseAviary.py
+++ b/gym_pybullet_drones/envs/BaseAviary.py
@@ -338,7 +338,6 @@ def step(self,
                                                           ) for i in range(self.NUM_DRONES)]
         #### Save, preprocess, and clip the action to the max. RPM #
         else:
-            self._saveLastAction(action)
             clipped_action = np.reshape(self._preprocessAction(action), (self.NUM_DRONES, 4))
         #### Repeat for as many as the aggregate physics steps #####
         for _ in range(self.PYB_STEPS_PER_CTRL):
@@ -466,7 +465,6 @@ def _housekeeping(self):
         self.GUI_INPUT_TEXT = -1*np.ones(self.NUM_DRONES)
         self.USE_GUI_RPM=False
         self.last_input_switch = 0
-        self.last_action = -1*np.ones((self.NUM_DRONES, 4))
         self.last_clipped_action = np.zeros((self.NUM_DRONES, 4))
         self.gui_input = np.zeros(4)
         #### Initialize the drones kinemaatic information ##########
@@ -916,23 +914,6 @@ def _normalizedActionToRPM(self,
     
     ################################################################################
 
-    def _saveLastAction(self,
-                        action
-                        ):
-        """Stores the most recent action into attribute `self.last_action`.
-
-        The last action can be used to compute aerodynamic effects.
-
-        Parameters
-        ----------
-        action : ndarray
-            Ndarray containing the current RPMs input for each drone.
-
-        """
-        self.last_action = np.reshape(action, (self.NUM_DRONES, 4))
-    
-    ################################################################################
-
     def _showDroneLocalAxes(self,
                             nth_drone
                             ):
diff --git a/gym_pybullet_drones/examples/learn.py b/gym_pybullet_drones/examples/learn.py
index f80b1a7a2..fc9042cee 100644
--- a/gym_pybullet_drones/examples/learn.py
+++ b/gym_pybullet_drones/examples/learn.py
@@ -37,10 +37,10 @@
 DEFAULT_OUTPUT_FOLDER = 'results'
 DEFAULT_COLAB = False
 
-DEFAULT_OBS = ObservationType('kin')
-DEFAULT_ACT = ActionType('rpm')
-DEFAULT_AGENTS = 2
-DEFAULT_MA = True
+DEFAULT_OBS = ObservationType('kin') # 'kin' or 'rgb'
+DEFAULT_ACT = ActionType('vel') # 'rpm' or 'pid' or 'vel' or 'one_d_rpm' / TO BE FIXED: 'one_d_pid'
+DEFAULT_AGENTS = 3
+DEFAULT_MA = False
 
 def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=DEFAULT_COLAB, record_video=DEFAULT_RECORD_VIDEO):
 
@@ -152,27 +152,28 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=D
         obs2 = obs.squeeze()
         act2 = action.squeeze()
         print("Obs:", obs, "\tAction", action, "\tReward:", reward, "\tTerminated:", terminated, "\tTruncated:", truncated)
-        if not DEFAULT_MA:
-            logger.log(drone=0,
-                   timestamp=i/test_env.CTRL_FREQ,
-                   state=np.hstack([obs2[0:3],
-                                    np.zeros(4),
-                                    obs2[3:15],
-                                    act2
-                                    ]),
-                   control=np.zeros(12)
-                   )
-        else:
-            for d in range(DEFAULT_AGENTS):
-                logger.log(drone=d,
+        if DEFAULT_OBS == ObservationType.KIN:
+            if not DEFAULT_MA:
+                logger.log(drone=0,
                     timestamp=i/test_env.CTRL_FREQ,
-                    state=np.hstack([obs2[d][0:3],
+                    state=np.hstack([obs2[0:3],
                                         np.zeros(4),
-                                        obs2[d][3:15],
-                                        act2[d]
+                                        obs2[3:15],
+                                        act2
                                         ]),
                     control=np.zeros(12)
                     )
+            else:
+                for d in range(DEFAULT_AGENTS):
+                    logger.log(drone=d,
+                        timestamp=i/test_env.CTRL_FREQ,
+                        state=np.hstack([obs2[d][0:3],
+                                            np.zeros(4),
+                                            obs2[d][3:15],
+                                            act2[d]
+                                            ]),
+                        control=np.zeros(12)
+                        )
         test_env.render()
         print(terminated)
         sync(i, start, test_env.CTRL_TIMESTEP)
@@ -180,7 +181,7 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=D
             obs = test_env.reset(seed=42, options={})
     test_env.close()
 
-    if plot:
+    if plot and DEFAULT_OBS == ObservationType.KIN:
         logger.plot()
 
 if __name__ == '__main__':