araffin · kalifou · Mar 14, 2019 · Mar 14, 2019 · Mar 15, 2019 · Mar 15, 2019
diff --git a/README.md b/README.md
@@ -43,6 +43,13 @@ python -m rl_baselines.train --algo rl_algo --env env1 --log-dir logs/ --srl-mod
 
 To use the robot's position as input instead of pixels, just pass `--srl-model ground_truth` instead of `--srl-model raw_pixels`
 
+To perform a cross evaluation for the different srl model, one could run in the terminal:
+
+```
+python -m rl_baselines.pipeline_cross --algo ppo2 --log-dir logs/ --srl-model srl_comnbination ground_truth --num-iteration 5 --num-timesteps 1000000 --task cc sqc sc --srl-config-file config/srl_models1.yaml config/srl_models2.yaml config/srl_models3.yaml
+```
+This will output the learning result into the repository logs.
+
 
 ## Installation
 
@@ -191,6 +198,11 @@ If you have troubles installing mpi4py, make sure you the following installed:
 sudo apt-get install libopenmpi-dev openmpi-bin openmpi-doc
 ```
 
+If you have troubles building wheel for ```atari```, you could fix that by running:
+```
+sudo apt-get install cmake libz-dev
+```
+
 ## Known issues
 
 The inverse kinematics function has trouble finding a solution when the arm is fully straight and the arm must bend to reach the requested point.

diff --git a/config/srl_models_circular.yaml b/config/srl_models_circular.yaml
@@ -0,0 +1,8 @@
+
+OmnirobotEnv-v0:
+  # Base path to SRL log folder
+  # log_folder: srl_zoo/logs/Omnibot_random_simple/
+  log_folder: srl_zoo/logs/Omnibot_circular/
+  autoencoder: 19-02-04_23h27_22_custom_cnn_ST_DIM200_autoencoder_reward_inverse_forward/srl_model.pth
+
+
diff --git a/config/srl_models_escape.yaml b/config/srl_models_escape.yaml
@@ -0,0 +1,9 @@
+
+OmnirobotEnv-v0:
+  # Base path to SRL log folder
+  # log_folder: srl_zoo/logs/escape_agent/
+  log_folder: srl_zoo/logs/escape_agent/
+  autoencoder: 19-02-04_23h27_22_custom_cnn_ST_DIM200_autoencoder_reward_inverse_forward/srl_model.pth
+  srl_combination: 19-06-03_18h38_59_custom_cnn_ST_DIM200_autoencoder_inverse/srl_model.pth
+
+
diff --git a/config/srl_models_merged.yaml b/config/srl_models_merged.yaml
@@ -0,0 +1,8 @@
+
+OmnirobotEnv-v0:
+  # Base path to SRL log folder
+  # log_folder: srl_zoo/logs/Omnibot_random_simple/
+  log_folder: srl_zoo/logs/merge_CC_SC/
+  autoencoder: 19-02-04_23h27_22_custom_cnn_ST_DIM200_autoencoder_reward_inverse_forward/srl_model.pth
+
+
diff --git a/config/srl_models_simple.yaml b/config/srl_models_simple.yaml
@@ -0,0 +1,8 @@
+
+OmnirobotEnv-v0:
+  # Base path to SRL log folder
+  # log_folder: srl_zoo/logs/Omnibot_random_simple/
+  log_folder: srl_zoo/logs/Omnibot_random_simple/
+  autoencoder: 19-02-04_23h27_22_custom_cnn_ST_DIM200_autoencoder_reward_inverse_forward/srl_model.pth
+
+
diff --git a/environments/dataset_generator.py b/environments/dataset_generator.py
diff --git a/environments/dataset_fusioner.py → environments/dataset_merger.py b/environments/dataset_fusioner.py → environments/dataset_merger.py
@@ -8,11 +8,23 @@
 import numpy as np
 from tqdm import tqdm
 
+# List of all possible labels identifying a task,
+#   for experiments in Continual Learning scenari.
+CONTINUAL_LEARNING_LABELS = ['CC', 'SC', 'EC', 'SQC', 'ESC']
+CL_LABEL_KEY = "continual_learning_label"
+
 
 def main():
     parser = argparse.ArgumentParser(description='Dataset Manipulator: useful to merge two datasets by concatenating '
                                                  + 'episodes. PS: Deleting sources after merging into the destination '
                                                  + 'folder.')
+    parser.add_argument('--continual-learning-labels', type=str, nargs=2, metavar=('label_1', 'label_2'),
+                        default=argparse.SUPPRESS, help='Labels for the continual learning RL distillation task.')
+    parser.add_argument('-f', '--force', action='store_true', default=False,
+                        help='Force the merge, even if it overrides something else,' 
+                             ' including the destination if it exist')
+    parser.add_argument('-rm', '--remove', action='store_true', default=False,
+                        help='Remove the original data set.')
     group = parser.add_mutually_exclusive_group()
     group.add_argument('--merge', type=str, nargs=3, metavar=('source_1', 'source_2', 'destination'),
                        default=argparse.SUPPRESS,
@@ -23,28 +35,44 @@ def main():
     if 'merge' in args:
         # let make sure everything is in order
         assert os.path.exists(args.merge[0]), "Error: dataset '{}' could not be found".format(args.merge[0])
-        assert (not os.path.exists(args.merge[2])), \
-            "Error: dataset '{}' already exists, cannot rename '{}' to '{}'".format(args.merge[2], args.merge[0],
-                                                                                    args.merge[2])
+        assert os.path.exists(args.merge[1]), "Error: dataset '{}' could not be found".format(args.merge[1])
+
+        # If the merge file exists already, delete it for the convenince of updating student's policy
+        if os.path.exists(args.merge[2]) or os.path.exists(args.merge[2] + '/'):
+            assert args.force, "Error: destination directory '{}' already exists".format(args.merge[2])
+            shutil.rmtree(args.merge[2])
+
+        if 'continual_learning_labels' in args:
+            assert args.continual_learning_labels[0] in CONTINUAL_LEARNING_LABELS \
+                   and args.continual_learning_labels[1] in CONTINUAL_LEARNING_LABELS, \
+                   "Please specify a valid Continual learning label to each dataset to be used for RL distillation !"
+
         # create the output
         os.mkdir(args.merge[2])
 
         # copy files from first source
-        os.rename(args.merge[0] + "/dataset_config.json", args.merge[2] + "/dataset_config.json")
-        os.rename(args.merge[0] + "/env_globals.json", args.merge[2] + "/env_globals.json")
-
+        shutil.copy2(args.merge[0] + "/dataset_config.json", args.merge[2] + "/dataset_config.json")
+        shutil.copy2(args.merge[0] + "/env_globals.json", args.merge[2] + "/env_globals.json")
+        record = ''
         for record in sorted(glob.glob(args.merge[0] + "/record_[0-9]*/*")):
             s = args.merge[2] + "/" + record.split("/")[-2] + '/' + record.split("/")[-1]
-            os.renames(record, s)
-
+            try:
+                shutil.copy2(record, s)
+            except FileNotFoundError:  # no folders named so, we should create it first
+                os.mkdir(os.path.dirname(s))
+                shutil.copy2(record, s)
         num_episode_dataset_1 = int(record.split("/")[-2][7:]) + 1
 
         # copy files from second source
         for record in sorted(glob.glob(args.merge[1] + "/record_[0-9]*/*")):
             episode = str(num_episode_dataset_1 + int(record.split("/")[-2][7:]))
             new_episode = record.split("/")[-2][:-len(episode)] + episode
             s = args.merge[2] + "/" + new_episode + '/' + record.split("/")[-1]
-            os.renames(record, s)
+            try:
+                shutil.copy2(record, s)
+            except FileNotFoundError:  # no folders named so, we should create it first
+                os.mkdir(os.path.dirname(s))
+                shutil.copy2(record, s)
         num_episode_dataset_2 = int(record.split("/")[-2][7:]) + 1
 
         # load and correct ground_truth
@@ -101,20 +129,40 @@ def main():
         preprocessed_load = np.load(args.merge[0] + "/preprocessed_data.npz")
         preprocessed_load_2 = np.load(args.merge[1] + "/preprocessed_data.npz")
 
-        for prepro_load in [preprocessed_load, preprocessed_load_2]:
+        dataset_1_size = preprocessed_load["actions"].shape[0]
+        dataset_2_size = preprocessed_load_2["actions"].shape[0]
+
+        # Concatenating additional information: indices of episode start, action probabilities, CL labels...
+        for idx, prepro_load in enumerate([preprocessed_load, preprocessed_load_2]):
             for arr in prepro_load.files:
                 pr_arr = prepro_load[arr]
-                preprocessed[arr] = np.concatenate((preprocessed.get(arr, []), pr_arr), axis=0)
+
                 if arr == "episode_starts":
-                    preprocessed[arr] = preprocessed[arr].astype(bool)
+                    to_class = bool
+                elif arr == "actions_proba" or arr == "rewards":
+                    to_class = float
+                else:
+                    to_class = int
+                if preprocessed.get(arr, None) is None:
+                    preprocessed[arr] = pr_arr.astype(to_class)
+                else:
+                    preprocessed[arr] = np.concatenate((preprocessed[arr].astype(to_class),
+                                                        pr_arr.astype(to_class)), axis=0)
+            if 'continual_learning_labels' in args:
+                if preprocessed.get(CL_LABEL_KEY, None) is None:
+                    preprocessed[CL_LABEL_KEY] = \
+                        np.array([args.continual_learning_labels[idx] for _ in range(dataset_1_size)])
                 else:
-                    preprocessed[arr] = preprocessed[arr].astype(int)
+                    preprocessed[CL_LABEL_KEY] = \
+                        np.concatenate((preprocessed[CL_LABEL_KEY], np.array([args.continual_learning_labels[idx]
+                                                                              for _ in range(dataset_2_size)])), axis=0)
 
         np.savez(args.merge[2] + "/preprocessed_data.npz", ** preprocessed)
 
         # remove the old folders
-        shutil.rmtree(args.merge[0])
-        shutil.rmtree(args.merge[1])
+        if args.remove:
+            shutil.rmtree(args.merge[0])
+            shutil.rmtree(args.merge[1])
 
 
 if __name__ == '__main__':