small updates to configs

utiasDSL · Jun 14, 2024 · 003d09b · 003d09b
1 parent e4b2fa8
commit 003d09b
Show file tree

Hide file tree

Showing 56 changed files with 1,222 additions and 2,951 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,7 +9,7 @@ examples/pid/*data/
 results/
 z_docstring.py
 TODOs.md
-# 
+#
 hpo_study*/
 hp_study*/
 comparisons/

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@
 
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
     hooks:
     -   id: check-ast
     -   id: check-yaml

diff --git a/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/gp_mpc_quadrotor_2D_150.yaml b/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/gp_mpc_quadrotor_2D_150.yaml
@@ -48,4 +48,4 @@
     rand_data_selection: false
     terminate_train_on_done: True
     terminate_test_on_done: True
-    parallel: True
+    parallel: True
diff --git a/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml b/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml
@@ -70,4 +70,4 @@ task_config:
     init_z_dot: 0.0
     init_theta: 0.0
     init_theta_dot: 0.0
-  verbose: false
+  verbose: false
diff --git a/...s/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/gp_mpc_quadrotor_2D_attitude_150.yaml b/...s/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/gp_mpc_quadrotor_2D_attitude_150.yaml
@@ -48,4 +48,4 @@
     rand_data_selection: false
     terminate_train_on_done: True
     terminate_test_on_done: True
-    parallel: True
+    parallel: True
diff --git a/examples/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml
@@ -63,4 +63,4 @@ task_config:
     init_z: 1
     init_z_dot: 0.0
     init_theta: 0.0
-  verbose: false
+  verbose: false
diff --git a/examples/hpo/gp_mpc/gp_mpc_hp_evaluation.sh b/examples/hpo/gp_mpc/gp_mpc_hp_evaluation.sh
@@ -45,7 +45,7 @@ done
 done
 
 # 20 training unseen seeds that are unseen during hpo (hpo only saw seeds in [0, 10000])
-seeds=(22403 84244 98825 40417 58454 47838 56715 77833 19880 59009 
+seeds=(22403 84244 98825 40417 58454 47838 56715 77833 19880 59009
        47722 81354 63825 13296 10779 98122 86221 89144 35192 24759)
 
 for seed in "${seeds[@]}"; do

diff --git a/examples/hpo/gp_mpc/gp_mpc_hpo.sh b/examples/hpo/gp_mpc/gp_mpc_hpo.sh
@@ -6,7 +6,7 @@
 # 2. Remove or backup the database if needed.
 # 3. Create a screen session `screen`, and detach it `Ctrl+a d`.
 # 4. Run this script by giving experiment name as the first arg. and the seed as the second.
-# 5. If you want to kill them, run `pkill -f "python ./experiments/comparisons/gpmpc/gpmpc_experiment.py"`. 
+# 5. If you want to kill them, run `pkill -f "python ./experiments/comparisons/gpmpc/gpmpc_experiment.py"`.
 #####################
 
 cd ~/safe-control-gym
@@ -117,4 +117,4 @@ echo "backing up the database"
 mysqldump --no-tablespaces -u optuna gp_mpc_hpo > gp_mpc_hpo.sql
 mv gp_mpc_hpo.sql ./examples/hpo/gp_mpc/hpo_study_${sampler}_${sys}/run${experiment_name}/gp_mpc_hpo.sql
 # remove the database
-python ./safe_control_gym/hyperparameters/database.py --func drop --tag gp_mpc_hpo
+python ./safe_control_gym/hyperparameters/database.py --func drop --tag gp_mpc_hpo
diff --git a/examples/hpo/gp_mpc/main.sh b/examples/hpo/gp_mpc/main.sh
@@ -18,4 +18,4 @@ bash examples/hpo/gp_mpc/gp_mpc_hpo.sh ${run} $((run)) ${sampler} ${localOrHost}
 done
 
 # TODO: eval
-bash examples/hpo/gp_mpc/gp_mpc_hp_evaluation.sh ${localOrHost} ${sys} ${task} ${sampler}
+bash examples/hpo/gp_mpc/gp_mpc_hp_evaluation.sh ${localOrHost} ${sys} ${task} ${sampler}
diff --git a/examples/hpo/hpo_experiment.py b/examples/hpo/hpo_experiment.py
@@ -4,15 +4,13 @@
 import os
 from functools import partial
 
-import yaml
-
 import matplotlib.pyplot as plt
 import numpy as np
+import yaml
 
 from safe_control_gym.envs.benchmark_env import Environment, Task
-
-from safe_control_gym.hyperparameters.hpo import HPO
 from safe_control_gym.experiments.base_experiment import BaseExperiment
+from safe_control_gym.hyperparameters.hpo import HPO
 from safe_control_gym.utils.configuration import ConfigFactory
 from safe_control_gym.utils.registration import make
 from safe_control_gym.utils.utils import set_device_from_config, set_dir_from_config, set_seed_from_config
@@ -123,7 +121,7 @@ def train(config):
             graph1_2 = 9
             graph3_1 = 0
             graph3_2 = 4
-        
+
         if config.task_config.quad_type != 4:
             _, ax = plt.subplots()
             ax.plot(results['obs'][0][:, graph1_1], results['obs'][0][:, graph1_2], 'r--', label='Agent Trajectory')
@@ -176,7 +174,7 @@ def train(config):
     with open(os.path.join(config.output_dir, 'metrics.pkl'), 'wb') as f:
         import pickle
         pickle.dump(metrics, f)
-    
+
     return eval_env.X_GOAL, results, metrics
 
 

diff --git a/examples/hpo/rl/main.sh b/examples/hpo/rl/main.sh
@@ -19,4 +19,4 @@ bash examples/hpo/rl/rl_hpo.sh ${run} $((run+6)) ${sampler} ${localOrHost} ${sys
 done
 
 # eval
-bash examples/hpo/rl/rl_hp_evaluation.sh ${localOrHost} ${algo} ${sys} ${task} ${sampler}
+bash examples/hpo/rl/rl_hp_evaluation.sh ${localOrHost} ${algo} ${sys} ${task} ${sampler}
diff --git a/examples/hpo/rl/quadrotor_2D.sh b/examples/hpo/rl/quadrotor_2D.sh
@@ -30,4 +30,4 @@ python ./examples/hpo/hpo_experiment.py \
 --task quadrotor \
 --overrides ./examples/hpo/rl/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml \
             ./examples/hpo/rl/sac/config_overrides/quadrotor_2D/sac_quadrotor_2D_.yaml \
---output_dir ./examples/hpo/results --n_episodes 10 --tag 2D --opt_hps '' --seed 6 --use_gpu True
+--output_dir ./examples/hpo/results --n_episodes 10 --tag 2D --opt_hps '' --seed 6 --use_gpu True
diff --git a/examples/hpo/rl/rl_hp_evaluation.sh b/examples/hpo/rl/rl_hp_evaluation.sh
@@ -45,7 +45,7 @@ done
 done
 
 # 20 training unseen seeds that are unseen during hpo (hpo only saw seeds in [0, 10000])
-seeds=(22403 84244 98825 40417 58454 47838 56715 77833 19880 59009 
+seeds=(22403 84244 98825 40417 58454 47838 56715 77833 19880 59009
        47722 81354 63825 13296 10779 98122 86221 89144 35192 24759)
 
 for seed in "${seeds[@]}"; do

diff --git a/examples/hpo/rl/rl_hpo.sh b/examples/hpo/rl/rl_hpo.sh
@@ -6,7 +6,7 @@
 # 2. Remove or backup the database if needed.
 # 3. Create a screen session `screen`, and detach it `Ctrl+a d`.
 # 4. Run this script by giving experiment name as the first arg. and the seed as the second.
-# 5. If you want to kill them, run `pkill -f "python ./experiments/comparisons/gpmpc/gpmpc_experiment.py"`. 
+# 5. If you want to kill them, run `pkill -f "python ./experiments/comparisons/gpmpc/gpmpc_experiment.py"`.
 #####################
 
 cd ~/safe-control-gym
@@ -118,4 +118,4 @@ echo "backing up the database"
 mysqldump --no-tablespaces -u optuna ${algo}_hpo > ${algo}_hpo.sql
 mv ${algo}_hpo.sql ./examples/hpo/rl/${algo}/hpo_study_${sampler}_${sys}/run${experiment_name}/${algo}_hpo.sql
 # remove the database
-python ./safe_control_gym/hyperparameters/database.py --func drop --tag ${algo}_hpo
+python ./safe_control_gym/hyperparameters/database.py --func drop --tag ${algo}_hpo
diff --git a/examples/pid/pid_experiment.py b/examples/pid/pid_experiment.py
@@ -9,7 +9,6 @@
 import pybullet as p
 
 from safe_control_gym.envs.benchmark_env import Environment, Task
-
 from safe_control_gym.experiments.base_experiment import BaseExperiment
 from safe_control_gym.utils.configuration import ConfigFactory
 from safe_control_gym.utils.registration import make

diff --git a/examples/rl/config_overrides/cartpole/cartpole_stab.yaml b/examples/rl/config_overrides/cartpole/cartpole_stab.yaml
@@ -50,7 +50,7 @@ task_config:
   # RL Reward
   rew_state_weight: [1, 0.1, 1, 0.1]
   rew_act_weight: 0.1
-  rew_exponential: True
+  rew_exponential: False
 
   # Disturbances
   disturbances:
@@ -62,8 +62,8 @@ task_config:
   constraints:
   - constraint_form: default_constraint
     constrained_variable: state
-    upper_bounds: [10, 10, 0.2, 0.2]
-    lower_bounds: [-10, -10, -0.2, -0.2]
+    upper_bounds: [5.0, 10.0, 0.5, 2.0]
+    lower_bounds: [-5.0, -10.0, -0.5, -2.0]
   - constraint_form: default_constraint
     constrained_variable: input
     upper_bounds: [10]

diff --git a/examples/rl/config_overrides/cartpole/ddpg_cartpole.yaml b/examples/rl/config_overrides/cartpole/ddpg_cartpole.yaml
@@ -0,0 +1,39 @@
+algo: td3
+algo_config:
+  # model args
+  hidden_dim: 128
+  activation: 'relu'
+
+  # loss args
+  gamma: 0.98
+  tau: 0.01
+
+  # noise args
+  random_process:
+    func: OrnsteinUhlenbeckProcess
+    std:
+      func: LinearSchedule
+      args: 0.2
+
+  # optim args
+  train_interval: 100
+  train_batch_size: 64
+  actor_lr: 0.003
+  critic_lr: 0.003
+
+  # runner args
+  max_env_steps: 10000
+  warm_up_steps: 50
+  rollout_batch_size: 4
+  num_workers: 1
+  max_buffer_size: 10000
+  deque_size: 10
+  eval_batch_size: 10
+
+  # misc
+  log_interval: 500
+  save_interval: 0
+  num_checkpoints: 0
+  eval_interval: 500
+  eval_save_best: False
+  tensorboard: False
diff --git a/examples/rl/config_overrides/cartpole/ppo_cartpole.yaml b/examples/rl/config_overrides/cartpole/ppo_cartpole.yaml
@@ -1,7 +1,7 @@
 algo: ppo
 algo_config:
   # model args
-  hidden_dim: 32
+  hidden_dim: 128
   activation: 'leaky_relu'
   norm_obs: False
   norm_reward: False
@@ -20,22 +20,22 @@ algo_config:
   # optim args
   opt_epochs: 5
   mini_batch_size: 128
-  actor_lr: 0.0007948148615930024
-  critic_lr: 0.007497368468753617
+  actor_lr: 0.001
+  critic_lr: 0.001
   max_grad_norm: 0.5
 
   # runner args
-  max_env_steps: 720000
+  max_env_steps: 50000
   num_workers: 1
   rollout_batch_size: 4
   rollout_steps: 150
   deque_size: 10
   eval_batch_size: 10
 
   # misc
-  log_interval: 6000
+  log_interval: 1000
   save_interval: 0
   num_checkpoints: 0
-  eval_interval: 6000
+  eval_interval: 1000
   eval_save_best: True
   tensorboard: False
diff --git a/examples/rl/config_overrides/cartpole/sac_cartpole.yaml b/examples/rl/config_overrides/cartpole/sac_cartpole.yaml
@@ -1,29 +1,29 @@
 algo: sac
 algo_config:
   # model args
-  hidden_dim: 256
+  hidden_dim: 128
   activation: 'relu'
 
   # loss args
   gamma: 0.98
-  tau: 0.12145208815621376
+  tau: 0.01
   init_temperature: 0.2
   use_entropy_tuning: False
   target_entropy: null
 
   # optim args
   train_interval: 100
   train_batch_size: 512
-  actor_lr: 0.00045196308120485273
-  critic_lr: 0.022547326782152065
-  entropy_lr: 0.001
+  actor_lr: 0.003
+  critic_lr: 0.003
+  entropy_lr: 0.003
 
   # runner args
-  max_env_steps: 50000
-  warm_up_steps: 100
+  max_env_steps: 10000
+  warm_up_steps: 50
   rollout_batch_size: 4
   num_workers: 1
-  max_buffer_size: 50000
+  max_buffer_size: 10000
   deque_size: 10
   eval_batch_size: 10
 

diff --git a/examples/rl/config_overrides/cartpole/td3_cartpole.yaml b/examples/rl/config_overrides/cartpole/td3_cartpole.yaml
@@ -1,7 +1,7 @@
 algo: td3
 algo_config:
   # model args
-  hidden_dim: 256
+  hidden_dim: 128
   activation: 'relu'
 
   # loss args
@@ -15,18 +15,18 @@ algo_config:
   critic_lr: 0.003
 
   # runner args
-  max_env_steps: 50000
-  warm_up_steps: 100
+  max_env_steps: 10000
+  warm_up_steps: 50
   rollout_batch_size: 4
   num_workers: 1
-  max_buffer_size: 50000
+  max_buffer_size: 10000
   deque_size: 10
   eval_batch_size: 10
 
   # misc
-  log_interval: 2000
+  log_interval: 500
   save_interval: 0
   num_checkpoints: 0
-  eval_interval: 2000
+  eval_interval: 500
   eval_save_best: True
   tensorboard: False
diff --git a/examples/rl/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml b/examples/rl/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml
@@ -89,5 +89,5 @@ task_config:
       lower_bounds:
         - 0.06
         - 0.06
-  done_on_out_of_bound: True
+  done_on_out_of_bound: False
   done_on_violation: False
diff --git a/examples/rl/config_overrides/quadrotor_2D/td3_quadrotor_2D.yaml b/examples/rl/config_overrides/quadrotor_2D/td3_quadrotor_2D.yaml
@@ -0,0 +1,29 @@
+algo: sac
+algo_config:
+  # model args
+  hidden_dim: 128
+  activation: "relu"
+  use_entropy_tuning: False
+
+  # optim args
+  train_interval: 100
+  train_batch_size: 256
+  actor_lr: 0.001
+  critic_lr: 0.001
+
+  # runner args
+  max_env_steps: 200000
+  warm_up_steps: 1000
+  rollout_batch_size: 4
+  num_workers: 1
+  max_buffer_size: 1000000
+  deque_size: 10
+  eval_batch_size: 10
+
+  # misc
+  log_interval: 4000
+  save_interval: 0
+  num_checkpoints: 0
+  eval_interval: 4000
+  eval_save_best: True
+  tensorboard: False
-Original file line number
+Diff line change
@@ Expand Up @@
     done
     # eval
-    bash examples/hpo/rl/rl_hp_evaluation.sh ${localOrHost} ${algo} ${sys} ${task} ${sampler}
+    bash examples/hpo/rl/rl_hp_evaluation.sh ${localOrHost} ${algo} ${sys} ${task} ${sampler}