Minor updates

Federico-PizarroBejarano · Federico-PizarroBejarano · commit 5d254c508466 · 2024-10-07T15:12:06.000-04:00
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,8 @@ examples/pid/*data/
 #
 experiments/mpsc/temp-data/
 experiments/mpsc/unsafe_rl_temp_data/
+experiments/mpsc/models/rl_models/
+experiments/mpsc/results*/
 #
 results/
 z_docstring.py
diff --git a/experiments/mpsc/config_overrides/quadrotor_3D/cpo_quadrotor_3D.yaml b/experiments/mpsc/config_overrides/quadrotor_3D/cpo_quadrotor_3D.yaml
@@ -1,8 +1,8 @@
 algo: cpo
 algo_config:
   # Model args
-  hidden1: 128
-  hidden2: 128
+  hidden1: 256
+  hidden2: 256
 
   # Optim args
   discount_factor: 0.98
@@ -16,15 +16,15 @@ algo_config:
   cost_d: 0.0
 
   # Runner args
-  max_steps: 1000
-  num_epochs: 4000
-  value_epochs: 150
+  max_steps: 2000
+  num_epochs: 5000
+  value_epochs: 300
   eval_batch_size: 20
 
   # Misc
-  log_interval: 40
+  log_interval: 50
   save_interval: 0
   num_checkpoints: 0
-  eval_interval: 40
+  eval_interval: 50
   eval_save_best: True
   tensorboard: False
diff --git a/experiments/mpsc/plotting_results.py b/experiments/mpsc/plotting_results.py
@@ -12,8 +12,7 @@
 from safe_control_gym.safety_filters.mpsc.mpsc_utils import get_discrete_derivative, high_frequency_content
 from safe_control_gym.utils.plotting import load_from_logs
 
-plot = False
-save_figs = True
+plot = True  # Saves figure if False
 
 U_EQs = {
     'cartpole': 0,
@@ -26,22 +25,23 @@
 
 
 def load_all_models(system, task, algo):
-    '''Loads the results of every MPSC cost function for a specific experiment with every algo.
+    '''Loads the results of every experiment.
 
     Args:
-        system (str): The system to be controlled.
-        task (str): The task to be completed (either 'stab' or 'track').
+        system (str): The system to be plotted.
+        task (str): The task to be plotted (either 'stab' or 'track').
+        algo (str): The controller to be plotted.
 
     Returns:
         all_results (dict): A dictionary containing all the results.
     '''
 
     all_results = {}
 
-    for model in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/'):
+    for model in ordered_models:
         all_results[model] = []
-        for seed in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/{model}/'):
-            with open(f'./results_mpsc/{system}/{task}/{algo}/results_{system}_{task}_{algo}_{model}/{seed}.pkl', 'rb') as f:
+        for seed in os.listdir(f'./results_mpsc/{system}/{task}/{algo}/results_{system}_{task}_{algo}_{model}/'):
+            with open(f'./results_mpsc/{system}/{task}/{algo}/results_{system}_{task}_{algo}_{model}/{seed}', 'rb') as f:
                 all_results[model].append(pickle.load(f))
         consolidate_multiple_seeds(all_results, model)
 
@@ -497,21 +497,22 @@ def plot_model_comparisons(system, task, algo, data_extractor):
     '''Plots the constraint violations of every controller for a specific experiment.
 
     Args:
-        system (str): The system to be controlled.
-        task (str): The task to be completed (either 'stab' or 'track').
-        mpsc_cost_horizon (str): The cost horizon used by the smooth MPSC cost functions.
+        system (str): The system to be plotted.
+        task (str): The task to be plotted (either 'stab' or 'track').
+        algo (str): The controller to be plotted.
+        data_extractor (func): The function which extracts the desired data.
     '''
 
     all_results = load_all_models(system, task, algo)
 
     fig = plt.figure(figsize=(16.0, 10.0))
     ax = fig.add_subplot(111)
 
-    labels = sorted(os.listdir(f'./models/rl_models/{system}/{task}/{algo}/'))
+    labels = ordered_models
 
     data = []
 
-    for model in labels:
+    for model in ordered_models:
         exp_data = all_results[model]
         data.append(data_extractor(exp_data))
 
@@ -522,24 +523,71 @@ def plot_model_comparisons(system, task, algo, data_extractor):
     ax.set_xticks(x, labels, weight='bold', fontsize=15, rotation=30, ha='right')
 
     medianprops = dict(linestyle='--', linewidth=2.5, color='black')
-    bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels))
-
-    colors = {'mpsf_sr_pen_1': 'lightgreen', 'mpsf_sr_pen_10': 'limegreen', 'mpsf_sr_pen_100': 'forestgreen', 'mpsf_sr_pen_1000': 'darkgreen', 'none': 'cornflowerblue', 'none_cpen': 'plum'}
+    bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels), showfliers=False)
 
     for patch, color in zip(bplot['boxes'], colors.values()):
         patch.set_facecolor(color)
 
     fig.tight_layout()
 
-    if data_extractor != extract_reward_cert:
-        ax.set_ylim(ymin=0)
     ax.yaxis.grid(True)
 
     if plot is True:
         plt.show()
-    if save_figs:
+    else:
         image_suffix = data_extractor.__name__.replace('extract_', '')
-        fig.savefig(f'./results_mpsc/{system}/{task}/{algo}/graphs/{system}_{task}_{image_suffix}.png', dpi=300)
+        fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300)
+    plt.close()
+
+
+def plot_step_time(system, task, algo):
+    '''Plots the constraint violations of every controller for a specific experiment.
+
+    Args:
+        system (str): The system to be plotted.
+        task (str): The task to be plotted (either 'stab' or 'track').
+        algo (str): The controller to be plotted.
+    '''
+
+    all_results = {}
+    for model in ordered_models:
+        all_results[model] = []
+        for seed in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/{model}/'):
+            all_results[model].append(load_from_logs(f'./models/rl_models/{system}/{task}/{algo}/{model}/{seed}/logs/'))
+
+    fig = plt.figure(figsize=(16.0, 10.0))
+    ax = fig.add_subplot(111)
+
+    labels = ordered_models
+
+    data = []
+
+    for model in ordered_models:
+        datum = np.array([values['stat/step_time'][3] for values in all_results[model]]).flatten()
+        data.append(datum)
+
+    ylabel = 'Training Time per Step [ms]'
+    ax.set_ylabel(ylabel, weight='bold', fontsize=45, labelpad=10)
+
+    x = np.arange(1, len(labels) + 1)
+    ax.set_xticks(x, labels, weight='bold', fontsize=15, rotation=30, ha='right')
+
+    medianprops = dict(linestyle='--', linewidth=2.5, color='black')
+    bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels), showfliers=False)
+
+    for patch, color in zip(bplot['boxes'], colors.values()):
+        patch.set_facecolor(color)
+
+    fig.tight_layout()
+
+    ax.set_ylim(ymin=0)
+    ax.yaxis.grid(True)
+
+    if plot is True:
+        plt.show()
+    else:
+        image_suffix = 'step_time'
+        fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300)
     plt.close()
 
 
@@ -571,43 +619,40 @@ def plot_all_logs(system, task, algo):
     '''Plots comparative plots of all the logs.
 
     Args:
-        system (str): The system to be controlled.
-        task (str): The task to be completed (either 'stab' or 'track').
-        mpsc_cost_horizon (str): The cost horizon used by the smooth MPSC cost functions.
+        system (str): The system to be plotted.
+        task (str): The task to be plotted (either 'stab' or 'track').
+        algo (str): The controller to be plotted.
     '''
     all_results = {}
 
-    for model in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/'):
+    for model in ordered_models:
         all_results[model] = []
         for seed in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/{model}/'):
             all_results[model].append(load_from_logs(f'./models/rl_models/{system}/{task}/{algo}/{model}/{seed}/logs/'))
 
-    for key in all_results['none'][0].keys():
-        plot_log(system, task, algo, key, all_results)
+    for key in all_results[ordered_models[0]][0].keys():
+        if key == 'stat_eval/ep_return':
+            plot_log(key, all_results)
+        if key == 'stat/constraint_violation':
+            plot_log(key, all_results)
 
 
-def plot_log(system, task, algo, key, all_results):
+def plot_log(key, all_results):
     '''Plots a comparative plot of the log 'key'.
 
     Args:
-        system (str): The system to be controlled.
-        task (str): The task to be completed (either 'stab' or 'track').
-        mpsc_cost_horizon (str): The cost horizon used by the smooth MPSC cost functions.
         key (str): The name of the log to be plotted.
         all_results (dict): A dictionary of all the logged results for all models.
     '''
     fig = plt.figure(figsize=(16.0, 10.0))
     ax = fig.add_subplot(111)
 
-    labels = sorted(all_results.keys())
-    labels = [label for label in labels if '_es' not in label]
+    labels = ordered_models
 
-    colors = {'mpsf_sr_pen_1': 'lightgreen', 'mpsf_sr_pen_10': 'limegreen', 'mpsf_sr_pen_100': 'forestgreen', 'mpsf_sr_pen_1000': 'darkgreen', 'none': 'cornflowerblue', 'none_cpen': 'plum'}
-
-    for model in labels:
+    for model, label in zip(ordered_models, labels):
         x = all_results[model][0][key][1] / 1000
         all_data = np.array([values[key][3] for values in all_results[model]])
-        ax.plot(x, np.mean(all_data, axis=0), label=model, color=colors[model])
+        ax.plot(x, np.mean(all_data, axis=0), label=label, color=colors[model])
         ax.fill_between(x, np.min(all_data, axis=0), np.max(all_data, axis=0), alpha=0.3, edgecolor=colors[model], facecolor=colors[model])
 
     ax.set_ylabel(key, weight='bold', fontsize=45, labelpad=10)
@@ -619,14 +664,25 @@ def plot_log(system, task, algo, key, all_results):
 
     if plot is True:
         plt.show()
-    if save_figs:
+    else:
         image_suffix = key.replace('/', '__')
-        fig.savefig(f'./results_mpsc/{system}/{task}/{algo}/graphs/{system}_{task}_{image_suffix}.png', dpi=300)
+        fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300)
     plt.close()
 
 
 if __name__ == '__main__':
-    ordered_costs = ['one_step', 'regularized', 'precomputed']
+    ordered_models = ['none', 'none_cpen_0.01', 'none_cpen_0.1', 'none_cpen_1', 'mpsf_sr_pen_0.1', 'mpsf_sr_pen_1', 'mpsf_sr_pen_10', 'mpsf_sr_pen_100']
+
+    colors = {
+        'none': 'cornflowerblue',
+        'none_cpen_0.01': 'plum',
+        'none_cpen_0.1': 'mediumorchid',
+        'none_cpen_1': 'darkorchid',
+        'mpsf_sr_pen_0.1': 'lightgreen',
+        'mpsf_sr_pen_1': 'limegreen',
+        'mpsf_sr_pen_10': 'forestgreen',
+        'mpsf_sr_pen_100': 'darkgreen',
+    }
 
     def extract_rate_of_change_of_inputs(results_data, certified=True):
         return extract_rate_of_change(results_data, certified, order=1, mode='input')
@@ -682,6 +738,7 @@ def extract_length_uncert(results_data, certified=False):
         algo_name = sys.argv[3]
 
     plot_all_logs(system_name, task_name, algo_name)
+    plot_step_time(system_name, task_name, algo_name)
     plot_model_comparisons(system_name, task_name, algo_name, extract_magnitude_of_corrections)
     plot_model_comparisons(system_name, task_name, algo_name, extract_percent_magnitude_of_corrections)
     plot_model_comparisons(system_name, task_name, algo_name, extract_max_correction)
diff --git a/experiments/mpsc/train_all_models.sh b/experiments/mpsc/train_all_models.sh
@@ -2,13 +2,21 @@
 for SYS in quadrotor_3D; do
     for ALGO in ppo; do
         for TASK in track; do
-            for SEED in 42 62 821 99 4077; do # 1102 1014 14 960406 2031; do
-                sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 1 $SEED #mpsf_sr_pen_1
-                sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 10 $SEED #mpsf_sr_pen_10
-                sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 100 $SEED #mpsf_sr_pen_100
-                sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 1000 $SEED #mpsf_sr_pen_1000
-                sbatch train_model.sbatch none False False $SYS $TASK $ALGO False False $SEED #none
-                sbatch train_model.sbatch none False False $SYS $TASK $ALGO True False $SEED #none_cpen
+            for SEED in 42 62 821 99 4077; do
+                # MPSF Ablation
+                ./train_model.sbatch none False False $SYS $TASK $ALGO False False $SEED #none
+                ./train_model.sbatch none False True  $SYS $TASK $ALGO False 1     $SEED #none_pen_1
+                ./train_model.sbatch none True  False $SYS $TASK $ALGO False False $SEED #none_sr
+                ./train_model.sbatch none True  True  $SYS $TASK $ALGO False 1     $SEED #none_sr_pen_1
+                ./train_model.sbatch mpsf False False $SYS $TASK $ALGO False False $SEED #mpsf
+                ./train_model.sbatch mpsf False True  $SYS $TASK $ALGO False 1     $SEED #mpsf_pen_1
+                ./train_model.sbatch mpsf True  False $SYS $TASK $ALGO False False $SEED #mpsf_sr
+                ./train_model.sbatch mpsf True  True  $SYS $TASK $ALGO False 1     $SEED #mpsf_sr_pen_1
+
+                # Constr Pen
+                ./train_model.sbatch none False False $SYS $TASK $ALGO True  0.01  $SEED #none_cpen_0.01
+                ./train_model.sbatch none False False $SYS $TASK $ALGO True  0.1   $SEED #none_cpen_0.1
+                ./train_model.sbatch none False False $SYS $TASK $ALGO True  1     $SEED #none_cpen_1
             done
         done
     done
diff --git a/experiments/mpsc/train_model.sbatch b/experiments/mpsc/train_model.sbatch
@@ -70,8 +70,10 @@ fi
 
 if [ "$8" = False ]; then
     SF_PEN_TAG=''
+    CONSTR_PEN_VAL=0
 else
     SF_PEN_TAG="_$8"
+    CONSTR_PEN_VAL=$8
 fi
 
 if [ -z "$9" ]; then
@@ -103,6 +105,7 @@ python3 train_rl.py \
     --kv_overrides \
         task_config.init_state=None \
         task_config.use_constraint_penalty=${CONSTR_PEN} \
+        task_config.constraint_penalty=${CONSTR_PEN_VAL} \
         sf_config.cost_function=${MPSC_COST} \
         sf_config.mpsc_cost_horizon=${MPSC_COST_HORIZON} \
         sf_config.decay_factor=${DECAY_FACTOR} \
@@ -116,4 +119,3 @@ python3 train_rl.py \
         sf_config.seed=${SEED} \
 
 ./mpsc_experiment.sh $TAG $SYS $TASK $ALGO $SEED
-# python plotting_results.py $SYS $TASK $ALGO
diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py
diff --git a/safe_control_gym/envs/benchmark_env.py b/safe_control_gym/envs/benchmark_env.py

Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,8 @@ examples/pid/*data/`
`8`	`8`	`#`
`9`	`9`	`experiments/mpsc/temp-data/`
`10`	`10`	`experiments/mpsc/unsafe_rl_temp_data/`
	`11`	`+experiments/mpsc/models/rl_models/`
	`12`	`+experiments/mpsc/results*/`
`11`	`13`	`#`
`12`	`14`	`results/`
`13`	`15`	`z_docstring.py`