From 003d09b1a4be32332d158c03ddc94e01dcb7f085 Mon Sep 17 00:00:00 2001 From: Shambhuraj Sawant Date: Fri, 14 Jun 2024 11:32:37 +0200 Subject: [PATCH] small updates to configs --- .gitignore | 2 +- .pre-commit-config.yaml | 2 +- .../quadrotor_2D/gp_mpc_quadrotor_2D_150.yaml | 2 +- .../quadrotor_2D/quadrotor_2D_track.yaml | 2 +- .../gp_mpc_quadrotor_2D_attitude_150.yaml | 2 +- .../quadrotor_2D_attitude_track.yaml | 2 +- examples/hpo/gp_mpc/gp_mpc_hp_evaluation.sh | 2 +- examples/hpo/gp_mpc/gp_mpc_hpo.sh | 4 +- examples/hpo/gp_mpc/main.sh | 2 +- examples/hpo/hpo_experiment.py | 10 +- examples/hpo/rl/main.sh | 2 +- examples/hpo/rl/quadrotor_2D.sh | 2 +- examples/hpo/rl/rl_hp_evaluation.sh | 2 +- examples/hpo/rl/rl_hpo.sh | 4 +- examples/pid/pid_experiment.py | 1 - .../cartpole/cartpole_stab.yaml | 6 +- .../cartpole/ddpg_cartpole.yaml | 39 + .../cartpole/ppo_cartpole.yaml | 12 +- .../cartpole/sac_cartpole.yaml | 16 +- .../cartpole/td3_cartpole.yaml | 12 +- .../quadrotor_2D/quadrotor_2D_track.yaml | 2 +- .../quadrotor_2D/td3_quadrotor_2D.yaml | 29 + examples/rl/data_analysis.ipynb | 1152 +++++++++-------- examples/rl/train_rl_model.sh | 22 +- .../cartpole/qlearning_mpc_cartpole.yaml | 1 - examples/rlmpc/rlmpc_experiment.py | 1 - examples/rlmpc/rlmpc_experiment.sh | 2 +- requirements.txt | 20 + safe_control_gym/controllers/__init__.py | 21 +- safe_control_gym/controllers/ddpg/ddpg.py | 28 +- .../controllers/ddpg/ddpg_utils.py | 30 +- safe_control_gym/controllers/lqr/lqr_utils.py | 2 +- safe_control_gym/controllers/mpc/gp_mpc.py | 36 +- safe_control_gym/controllers/mpc/gp_utils.py | 73 +- .../controllers/mpc/gpmpc_acados.py | 183 ++- safe_control_gym/controllers/mpc/mpc.py | 42 +- .../controllers/mpc/mpc_acados.py | 112 +- .../controllers/mpc/sqp_gp_mpc.py | 141 +- safe_control_gym/controllers/mpc/sqp_mpc.py | 52 +- .../controllers/mpc/sqp_mpc_utils.py | 5 +- safe_control_gym/controllers/pid/pid.py | 4 +- safe_control_gym/controllers/ppo/ppo.py | 1 + safe_control_gym/controllers/sac/sac.py | 2 + safe_control_gym/controllers/sac/sac_utils.py | 10 +- safe_control_gym/controllers/td3/td3.yaml | 4 - safe_control_gym/envs/__init__.py | 4 +- safe_control_gym/envs/benchmark_env.py | 6 +- safe_control_gym/envs/constraints.py | 6 +- safe_control_gym/envs/gym_control/cartpole.py | 10 +- .../envs/gym_pendulum/pendulum.py | 26 +- .../envs/gym_pybullet_drones/quadrotor.py | 47 +- .../gym_pybullet_drones/quadrotor_utils.py | 110 +- .../experiments/base_experiment.py | 4 +- safe_control_gym/lyapunov/lyapunov.py | 1132 ---------------- safe_control_gym/lyapunov/utilities.py | 722 ----------- tests/test_hpo/test_train.py | 5 +- 56 files changed, 1222 insertions(+), 2951 deletions(-) create mode 100644 examples/rl/config_overrides/cartpole/ddpg_cartpole.yaml create mode 100644 examples/rl/config_overrides/quadrotor_2D/td3_quadrotor_2D.yaml create mode 100644 requirements.txt delete mode 100644 safe_control_gym/lyapunov/lyapunov.py delete mode 100644 safe_control_gym/lyapunov/utilities.py diff --git a/.gitignore b/.gitignore index 145bddd88..a35d52c42 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,7 @@ examples/pid/*data/ results/ z_docstring.py TODOs.md -# +# hpo_study*/ hp_study*/ comparisons/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 13075c111..15bc82105 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,7 +9,7 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-ast - id: check-yaml diff --git a/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/gp_mpc_quadrotor_2D_150.yaml b/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/gp_mpc_quadrotor_2D_150.yaml index abe667855..7c3748b2b 100644 --- a/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/gp_mpc_quadrotor_2D_150.yaml +++ b/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/gp_mpc_quadrotor_2D_150.yaml @@ -48,4 +48,4 @@ rand_data_selection: false terminate_train_on_done: True terminate_test_on_done: True - parallel: True \ No newline at end of file + parallel: True diff --git a/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml b/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml index 4827241f5..a777063d5 100644 --- a/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml +++ b/examples/hpo/gp_mpc/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml @@ -70,4 +70,4 @@ task_config: init_z_dot: 0.0 init_theta: 0.0 init_theta_dot: 0.0 - verbose: false \ No newline at end of file + verbose: false diff --git a/examples/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/gp_mpc_quadrotor_2D_attitude_150.yaml b/examples/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/gp_mpc_quadrotor_2D_attitude_150.yaml index 20d4f3268..a1a89cc0f 100644 --- a/examples/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/gp_mpc_quadrotor_2D_attitude_150.yaml +++ b/examples/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/gp_mpc_quadrotor_2D_attitude_150.yaml @@ -48,4 +48,4 @@ rand_data_selection: false terminate_train_on_done: True terminate_test_on_done: True - parallel: True \ No newline at end of file + parallel: True diff --git a/examples/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml index ae3ca80de..e1561e2d0 100644 --- a/examples/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml +++ b/examples/hpo/gp_mpc/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml @@ -63,4 +63,4 @@ task_config: init_z: 1 init_z_dot: 0.0 init_theta: 0.0 - verbose: false \ No newline at end of file + verbose: false diff --git a/examples/hpo/gp_mpc/gp_mpc_hp_evaluation.sh b/examples/hpo/gp_mpc/gp_mpc_hp_evaluation.sh index b67cffc66..cacd80dd0 100644 --- a/examples/hpo/gp_mpc/gp_mpc_hp_evaluation.sh +++ b/examples/hpo/gp_mpc/gp_mpc_hp_evaluation.sh @@ -45,7 +45,7 @@ done done # 20 training unseen seeds that are unseen during hpo (hpo only saw seeds in [0, 10000]) -seeds=(22403 84244 98825 40417 58454 47838 56715 77833 19880 59009 +seeds=(22403 84244 98825 40417 58454 47838 56715 77833 19880 59009 47722 81354 63825 13296 10779 98122 86221 89144 35192 24759) for seed in "${seeds[@]}"; do diff --git a/examples/hpo/gp_mpc/gp_mpc_hpo.sh b/examples/hpo/gp_mpc/gp_mpc_hpo.sh index de8541b87..951c664ee 100644 --- a/examples/hpo/gp_mpc/gp_mpc_hpo.sh +++ b/examples/hpo/gp_mpc/gp_mpc_hpo.sh @@ -6,7 +6,7 @@ # 2. Remove or backup the database if needed. # 3. Create a screen session `screen`, and detach it `Ctrl+a d`. # 4. Run this script by giving experiment name as the first arg. and the seed as the second. -# 5. If you want to kill them, run `pkill -f "python ./experiments/comparisons/gpmpc/gpmpc_experiment.py"`. +# 5. If you want to kill them, run `pkill -f "python ./experiments/comparisons/gpmpc/gpmpc_experiment.py"`. ##################### cd ~/safe-control-gym @@ -117,4 +117,4 @@ echo "backing up the database" mysqldump --no-tablespaces -u optuna gp_mpc_hpo > gp_mpc_hpo.sql mv gp_mpc_hpo.sql ./examples/hpo/gp_mpc/hpo_study_${sampler}_${sys}/run${experiment_name}/gp_mpc_hpo.sql # remove the database -python ./safe_control_gym/hyperparameters/database.py --func drop --tag gp_mpc_hpo \ No newline at end of file +python ./safe_control_gym/hyperparameters/database.py --func drop --tag gp_mpc_hpo diff --git a/examples/hpo/gp_mpc/main.sh b/examples/hpo/gp_mpc/main.sh index c04997bcf..632454d29 100644 --- a/examples/hpo/gp_mpc/main.sh +++ b/examples/hpo/gp_mpc/main.sh @@ -18,4 +18,4 @@ bash examples/hpo/gp_mpc/gp_mpc_hpo.sh ${run} $((run)) ${sampler} ${localOrHost} done # TODO: eval -bash examples/hpo/gp_mpc/gp_mpc_hp_evaluation.sh ${localOrHost} ${sys} ${task} ${sampler} \ No newline at end of file +bash examples/hpo/gp_mpc/gp_mpc_hp_evaluation.sh ${localOrHost} ${sys} ${task} ${sampler} diff --git a/examples/hpo/hpo_experiment.py b/examples/hpo/hpo_experiment.py index 544c41d79..07032ea1d 100644 --- a/examples/hpo/hpo_experiment.py +++ b/examples/hpo/hpo_experiment.py @@ -4,15 +4,13 @@ import os from functools import partial -import yaml - import matplotlib.pyplot as plt import numpy as np +import yaml from safe_control_gym.envs.benchmark_env import Environment, Task - -from safe_control_gym.hyperparameters.hpo import HPO from safe_control_gym.experiments.base_experiment import BaseExperiment +from safe_control_gym.hyperparameters.hpo import HPO from safe_control_gym.utils.configuration import ConfigFactory from safe_control_gym.utils.registration import make from safe_control_gym.utils.utils import set_device_from_config, set_dir_from_config, set_seed_from_config @@ -123,7 +121,7 @@ def train(config): graph1_2 = 9 graph3_1 = 0 graph3_2 = 4 - + if config.task_config.quad_type != 4: _, ax = plt.subplots() ax.plot(results['obs'][0][:, graph1_1], results['obs'][0][:, graph1_2], 'r--', label='Agent Trajectory') @@ -176,7 +174,7 @@ def train(config): with open(os.path.join(config.output_dir, 'metrics.pkl'), 'wb') as f: import pickle pickle.dump(metrics, f) - + return eval_env.X_GOAL, results, metrics diff --git a/examples/hpo/rl/main.sh b/examples/hpo/rl/main.sh index f286da8cd..4a58c5cbe 100644 --- a/examples/hpo/rl/main.sh +++ b/examples/hpo/rl/main.sh @@ -19,4 +19,4 @@ bash examples/hpo/rl/rl_hpo.sh ${run} $((run+6)) ${sampler} ${localOrHost} ${sys done # eval -bash examples/hpo/rl/rl_hp_evaluation.sh ${localOrHost} ${algo} ${sys} ${task} ${sampler} \ No newline at end of file +bash examples/hpo/rl/rl_hp_evaluation.sh ${localOrHost} ${algo} ${sys} ${task} ${sampler} diff --git a/examples/hpo/rl/quadrotor_2D.sh b/examples/hpo/rl/quadrotor_2D.sh index 16ab117e7..836b35172 100644 --- a/examples/hpo/rl/quadrotor_2D.sh +++ b/examples/hpo/rl/quadrotor_2D.sh @@ -30,4 +30,4 @@ python ./examples/hpo/hpo_experiment.py \ --task quadrotor \ --overrides ./examples/hpo/rl/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml \ ./examples/hpo/rl/sac/config_overrides/quadrotor_2D/sac_quadrotor_2D_.yaml \ ---output_dir ./examples/hpo/results --n_episodes 10 --tag 2D --opt_hps '' --seed 6 --use_gpu True \ No newline at end of file +--output_dir ./examples/hpo/results --n_episodes 10 --tag 2D --opt_hps '' --seed 6 --use_gpu True diff --git a/examples/hpo/rl/rl_hp_evaluation.sh b/examples/hpo/rl/rl_hp_evaluation.sh index f823322ea..75ea8f6ff 100644 --- a/examples/hpo/rl/rl_hp_evaluation.sh +++ b/examples/hpo/rl/rl_hp_evaluation.sh @@ -45,7 +45,7 @@ done done # 20 training unseen seeds that are unseen during hpo (hpo only saw seeds in [0, 10000]) -seeds=(22403 84244 98825 40417 58454 47838 56715 77833 19880 59009 +seeds=(22403 84244 98825 40417 58454 47838 56715 77833 19880 59009 47722 81354 63825 13296 10779 98122 86221 89144 35192 24759) for seed in "${seeds[@]}"; do diff --git a/examples/hpo/rl/rl_hpo.sh b/examples/hpo/rl/rl_hpo.sh index c944d416d..ce5465a18 100644 --- a/examples/hpo/rl/rl_hpo.sh +++ b/examples/hpo/rl/rl_hpo.sh @@ -6,7 +6,7 @@ # 2. Remove or backup the database if needed. # 3. Create a screen session `screen`, and detach it `Ctrl+a d`. # 4. Run this script by giving experiment name as the first arg. and the seed as the second. -# 5. If you want to kill them, run `pkill -f "python ./experiments/comparisons/gpmpc/gpmpc_experiment.py"`. +# 5. If you want to kill them, run `pkill -f "python ./experiments/comparisons/gpmpc/gpmpc_experiment.py"`. ##################### cd ~/safe-control-gym @@ -118,4 +118,4 @@ echo "backing up the database" mysqldump --no-tablespaces -u optuna ${algo}_hpo > ${algo}_hpo.sql mv ${algo}_hpo.sql ./examples/hpo/rl/${algo}/hpo_study_${sampler}_${sys}/run${experiment_name}/${algo}_hpo.sql # remove the database -python ./safe_control_gym/hyperparameters/database.py --func drop --tag ${algo}_hpo \ No newline at end of file +python ./safe_control_gym/hyperparameters/database.py --func drop --tag ${algo}_hpo diff --git a/examples/pid/pid_experiment.py b/examples/pid/pid_experiment.py index 7316c21eb..ed5dee36a 100644 --- a/examples/pid/pid_experiment.py +++ b/examples/pid/pid_experiment.py @@ -9,7 +9,6 @@ import pybullet as p from safe_control_gym.envs.benchmark_env import Environment, Task - from safe_control_gym.experiments.base_experiment import BaseExperiment from safe_control_gym.utils.configuration import ConfigFactory from safe_control_gym.utils.registration import make diff --git a/examples/rl/config_overrides/cartpole/cartpole_stab.yaml b/examples/rl/config_overrides/cartpole/cartpole_stab.yaml index 7ee47f77a..fad908d49 100644 --- a/examples/rl/config_overrides/cartpole/cartpole_stab.yaml +++ b/examples/rl/config_overrides/cartpole/cartpole_stab.yaml @@ -50,7 +50,7 @@ task_config: # RL Reward rew_state_weight: [1, 0.1, 1, 0.1] rew_act_weight: 0.1 - rew_exponential: True + rew_exponential: False # Disturbances disturbances: @@ -62,8 +62,8 @@ task_config: constraints: - constraint_form: default_constraint constrained_variable: state - upper_bounds: [10, 10, 0.2, 0.2] - lower_bounds: [-10, -10, -0.2, -0.2] + upper_bounds: [5.0, 10.0, 0.5, 2.0] + lower_bounds: [-5.0, -10.0, -0.5, -2.0] - constraint_form: default_constraint constrained_variable: input upper_bounds: [10] diff --git a/examples/rl/config_overrides/cartpole/ddpg_cartpole.yaml b/examples/rl/config_overrides/cartpole/ddpg_cartpole.yaml new file mode 100644 index 000000000..8550c6564 --- /dev/null +++ b/examples/rl/config_overrides/cartpole/ddpg_cartpole.yaml @@ -0,0 +1,39 @@ +algo: td3 +algo_config: + # model args + hidden_dim: 128 + activation: 'relu' + + # loss args + gamma: 0.98 + tau: 0.01 + + # noise args + random_process: + func: OrnsteinUhlenbeckProcess + std: + func: LinearSchedule + args: 0.2 + + # optim args + train_interval: 100 + train_batch_size: 64 + actor_lr: 0.003 + critic_lr: 0.003 + + # runner args + max_env_steps: 10000 + warm_up_steps: 50 + rollout_batch_size: 4 + num_workers: 1 + max_buffer_size: 10000 + deque_size: 10 + eval_batch_size: 10 + + # misc + log_interval: 500 + save_interval: 0 + num_checkpoints: 0 + eval_interval: 500 + eval_save_best: False + tensorboard: False diff --git a/examples/rl/config_overrides/cartpole/ppo_cartpole.yaml b/examples/rl/config_overrides/cartpole/ppo_cartpole.yaml index d3d7cdaa0..36a87e58b 100644 --- a/examples/rl/config_overrides/cartpole/ppo_cartpole.yaml +++ b/examples/rl/config_overrides/cartpole/ppo_cartpole.yaml @@ -1,7 +1,7 @@ algo: ppo algo_config: # model args - hidden_dim: 32 + hidden_dim: 128 activation: 'leaky_relu' norm_obs: False norm_reward: False @@ -20,12 +20,12 @@ algo_config: # optim args opt_epochs: 5 mini_batch_size: 128 - actor_lr: 0.0007948148615930024 - critic_lr: 0.007497368468753617 + actor_lr: 0.001 + critic_lr: 0.001 max_grad_norm: 0.5 # runner args - max_env_steps: 720000 + max_env_steps: 50000 num_workers: 1 rollout_batch_size: 4 rollout_steps: 150 @@ -33,9 +33,9 @@ algo_config: eval_batch_size: 10 # misc - log_interval: 6000 + log_interval: 1000 save_interval: 0 num_checkpoints: 0 - eval_interval: 6000 + eval_interval: 1000 eval_save_best: True tensorboard: False diff --git a/examples/rl/config_overrides/cartpole/sac_cartpole.yaml b/examples/rl/config_overrides/cartpole/sac_cartpole.yaml index d9201b492..5e1222631 100644 --- a/examples/rl/config_overrides/cartpole/sac_cartpole.yaml +++ b/examples/rl/config_overrides/cartpole/sac_cartpole.yaml @@ -1,12 +1,12 @@ algo: sac algo_config: # model args - hidden_dim: 256 + hidden_dim: 128 activation: 'relu' # loss args gamma: 0.98 - tau: 0.12145208815621376 + tau: 0.01 init_temperature: 0.2 use_entropy_tuning: False target_entropy: null @@ -14,16 +14,16 @@ algo_config: # optim args train_interval: 100 train_batch_size: 512 - actor_lr: 0.00045196308120485273 - critic_lr: 0.022547326782152065 - entropy_lr: 0.001 + actor_lr: 0.003 + critic_lr: 0.003 + entropy_lr: 0.003 # runner args - max_env_steps: 50000 - warm_up_steps: 100 + max_env_steps: 10000 + warm_up_steps: 50 rollout_batch_size: 4 num_workers: 1 - max_buffer_size: 50000 + max_buffer_size: 10000 deque_size: 10 eval_batch_size: 10 diff --git a/examples/rl/config_overrides/cartpole/td3_cartpole.yaml b/examples/rl/config_overrides/cartpole/td3_cartpole.yaml index e5a13ba98..f242670a3 100644 --- a/examples/rl/config_overrides/cartpole/td3_cartpole.yaml +++ b/examples/rl/config_overrides/cartpole/td3_cartpole.yaml @@ -1,7 +1,7 @@ algo: td3 algo_config: # model args - hidden_dim: 256 + hidden_dim: 128 activation: 'relu' # loss args @@ -15,18 +15,18 @@ algo_config: critic_lr: 0.003 # runner args - max_env_steps: 50000 - warm_up_steps: 100 + max_env_steps: 10000 + warm_up_steps: 50 rollout_batch_size: 4 num_workers: 1 - max_buffer_size: 50000 + max_buffer_size: 10000 deque_size: 10 eval_batch_size: 10 # misc - log_interval: 2000 + log_interval: 500 save_interval: 0 num_checkpoints: 0 - eval_interval: 2000 + eval_interval: 500 eval_save_best: True tensorboard: False diff --git a/examples/rl/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml b/examples/rl/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml index 2aa366fb5..85821ff16 100644 --- a/examples/rl/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml +++ b/examples/rl/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml @@ -89,5 +89,5 @@ task_config: lower_bounds: - 0.06 - 0.06 - done_on_out_of_bound: True + done_on_out_of_bound: False done_on_violation: False diff --git a/examples/rl/config_overrides/quadrotor_2D/td3_quadrotor_2D.yaml b/examples/rl/config_overrides/quadrotor_2D/td3_quadrotor_2D.yaml new file mode 100644 index 000000000..970b9d7a9 --- /dev/null +++ b/examples/rl/config_overrides/quadrotor_2D/td3_quadrotor_2D.yaml @@ -0,0 +1,29 @@ +algo: sac +algo_config: + # model args + hidden_dim: 128 + activation: "relu" + use_entropy_tuning: False + + # optim args + train_interval: 100 + train_batch_size: 256 + actor_lr: 0.001 + critic_lr: 0.001 + + # runner args + max_env_steps: 200000 + warm_up_steps: 1000 + rollout_batch_size: 4 + num_workers: 1 + max_buffer_size: 1000000 + deque_size: 10 + eval_batch_size: 10 + + # misc + log_interval: 4000 + save_interval: 0 + num_checkpoints: 0 + eval_interval: 4000 + eval_save_best: True + tensorboard: False diff --git a/examples/rl/data_analysis.ipynb b/examples/rl/data_analysis.ipynb index 57221c850..a82941cbe 100644 --- a/examples/rl/data_analysis.ipynb +++ b/examples/rl/data_analysis.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -22,20 +22,22 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# data_paths = {\"ppo\": os.getcwd()+\"/ppo_data/\",\n", "# \"sac\": os.getcwd()+\"/sac_data/\"}\n", - "data_paths = {\"ppo\": os.getcwd()+\"/ppo_data_3/\", \n", - " \"sac\": os.getcwd()+\"/sac_data_3/\"}\n", - "seeds = [i for i in range(0,5)]\n" + "data_paths = {\"ppo\": os.getcwd()+\"/Results/cartpole_ppo_data/\", \n", + " \"sac\": os.getcwd()+\"/Results/cartpole_sac_data/\", \n", + " \"td3\": os.getcwd()+\"/Results/cartpole_td3_data/\", \n", + " \"ddpg\": os.getcwd()+\"/Results/cartpole_ddpg_data/\"}\n", + "seeds = [i for i in range(0,10)]\n" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -69,7 +71,9 @@ "output_type": "stream", "text": [ "ppo\n", - "sac\n" + "sac\n", + "td3\n", + "ddpg\n" ] } ], @@ -80,558 +84,565 @@ " perf_data.update({method: {}})\n", " for seed in seeds:\n", " xk, x, yk, y = load_from_log_file(data_paths[method] + str(seed) + \"/logs/stat_eval/ep_return.log\")\n", - " # xk, x, zk, z = load_from_log_file(data_paths[method] + str(seed) + \"/logs/stat_eval/ep_return_std.log\")\n", + " xk, x, zk, z = load_from_log_file(data_paths[method] + str(seed) + \"/logs/stat_eval/ep_return_std.log\")\n", " xk, x, ck, c = load_from_log_file(data_paths[method] + str(seed) + \"/logs/stat_eval/constraint_violation.log\")\n", - " perf_data[method].update({seed: {\"x\": x, \"y\": y, \"c\": c}})\n", - " # perf_data[method].update({seed: {\"x\": x, \"y\": y, \"z\": z, \"c\": c}})" + " # perf_data[method].update({seed: {\"x\": x, \"y\": y, \"c\": c}})\n", + " perf_data[method].update({seed: {\"x\": x, \"y\": y, \"z\": z, \"c\": c}})" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'ppo': {0: {'x': array([ 6000., 12000., 18000., 24000., 30000., 36000., 42000.,\n", - " 48000., 54000., 60000., 66000., 72000., 78000., 84000.,\n", - " 90000., 96000., 102000., 108000., 114000., 120000., 126000.,\n", - " 132000., 138000., 144000., 150000., 156000., 162000., 168000.,\n", - " 174000., 180000., 186000., 192000., 198000., 204000., 210000.,\n", - " 216000., 222000., 228000., 234000., 240000., 246000., 252000.,\n", - " 258000., 264000., 270000., 276000., 282000., 288000., 294000.,\n", - " 300000., 306000., 312000., 318000., 324000., 330000., 336000.,\n", - " 342000., 348000., 354000., 360000., 366000., 372000., 378000.,\n", - " 384000., 390000., 396000., 402000., 408000., 414000., 420000.,\n", - " 426000., 432000., 438000., 444000., 450000., 456000., 462000.,\n", - " 468000., 474000., 480000., 486000., 492000., 498000., 504000.,\n", - " 510000., 516000., 522000., 528000., 534000., 540000., 546000.,\n", - " 552000., 558000., 564000., 570000., 576000., 582000., 588000.,\n", - " 594000., 600000., 606000., 612000., 618000., 624000., 630000.,\n", - " 636000., 642000., 648000., 654000., 660000., 666000., 672000.,\n", - " 678000., 684000., 690000., 696000., 702000., 708000., 714000.,\n", - " 720000.]),\n", - " 'y': array([-3.45498635e+03, -3.48254554e+03, -1.69217232e+03, -1.08838016e+03,\n", - " -9.39585496e+02, -6.72625696e+02, -5.56809882e+02, -5.39169950e+02,\n", - " -5.16726576e+02, -4.48014944e+02, -4.52004794e+02, -4.79347985e+02,\n", - " -4.59744974e+02, -4.56071579e+02, -4.51917542e+02, -4.49418410e+02,\n", - " -4.26013786e+02, -4.25260157e+02, -4.26273925e+02, -4.20489256e+02,\n", - " -4.12712053e+02, -4.11918704e+02, -4.07599886e+02, -4.11589700e+02,\n", - " -4.16310611e+02, -4.22223582e+02, -4.02759465e+02, -3.90424556e+02,\n", - " -4.16063878e+02, -4.03101372e+02, -3.92953130e+02, -4.07499726e+02,\n", - " -4.13173671e+02, -3.84699116e+02, -3.89044258e+02, -3.91292563e+02,\n", - " -3.79954833e+02, -3.58240788e+02, -3.80524156e+02, -3.86084518e+02,\n", - " -3.77026946e+02, -3.72266822e+02, -3.76361099e+02, -3.77472582e+02,\n", - " -3.58353346e+02, -3.70557575e+02, -3.76114750e+02, -3.87815346e+02,\n", - " -3.69684531e+02, -3.62254136e+02, -3.65242684e+02, -3.60853945e+02,\n", - " -3.61192229e+02, -3.53211188e+02, -3.42543957e+02, -3.41361376e+02,\n", - " -3.20797985e+02, -3.39806086e+02, -3.50114993e+02, -3.23611739e+02,\n", - " -3.39287357e+02, -3.07193556e+02, -3.08879218e+02, -3.08418859e+02,\n", - " -2.81071002e+02, -3.17164044e+02, -3.12163944e+02, -2.98741055e+02,\n", - " -2.53012678e+02, -2.65612249e+02, -2.22334254e+02, -1.93236216e+02,\n", - " -7.58022565e+01, -4.01795799e+01, -1.05472220e+01, -5.11317381e+01,\n", - " -1.20507554e+01, -5.42344219e+00, -6.86605157e+00, -1.01770392e+01,\n", - " -7.26960760e+00, -1.26251993e+01, -1.19577866e+01, -6.93906030e+00,\n", - " -1.03768145e+01, -6.33331620e+00, -7.71478413e+00, -6.77252206e+00,\n", - " -7.38488240e+00, -7.10425718e+00, -7.47550590e+00, -6.63635211e+00,\n", - " -7.43633755e+00, -7.17921542e+00, -5.77101839e+00, -5.14521805e+00,\n", - " -5.34710781e+00, -5.92730431e+00, -4.65211391e+00, -5.97789747e+00,\n", - " -6.08508513e+00, -5.36868486e+00, -4.23034163e+00, -4.90287717e+00,\n", - " -4.63733288e+00, -5.97522586e+00, -4.08198678e+00, -4.27246695e+00,\n", - " -4.41333018e+00, -4.11989374e+00, -4.32460065e+00, -3.67991151e+00,\n", - " -4.41596619e+00, -4.03332029e+00, -3.45852920e+00, -3.29081237e+00,\n", - " -3.84996114e+00, -3.35997062e+00, -3.22459115e+00, -2.82396909e+00]),\n", - " 'c': array([73.9, 74. , 72.8, 70.1, 69.9, 69.7, 70.1, 72.5, 73.1, 73.8, 74.6,\n", - " 75. , 74.8, 74.7, 74.8, 74.9, 74.5, 73.8, 74.8, 74.8, 74. , 73.3,\n", - " 74.7, 74.8, 74.4, 75. , 74.1, 73.6, 74.4, 73.9, 72.5, 75. , 73.2,\n", - " 73.3, 72.5, 74.4, 74. , 67.9, 73.1, 71.7, 72.4, 70.8, 74.2, 74.3,\n", - " 74.7, 74.6, 75. , 75. , 71.4, 73.7, 73.7, 74.7, 75. , 71.3, 68. ,\n", - " 66.3, 63.1, 67. , 67.5, 61.6, 62.3, 58.5, 57.9, 57.1, 52.7, 57.9,\n", - " 55.6, 52.8, 47.5, 48.2, 43.9, 39.5, 18.7, 14. , 1.5, 6.2, 0. ,\n", - " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. , 0. , 0.6, 0.7, 0. , 0.2, 0. ,\n", - " 0. , 0. , 0.5, 0. , 0. , 0.2, 0.8, 0. , 0.8, 0.3, 0. ,\n", - " 1. , 0.8, 0.2, 0. , 0. , 0. , 0.3, 0.1, 0.8, 0.3])},\n", - " 1: {'x': array([ 6000., 12000., 18000., 24000., 30000., 36000., 42000.,\n", - " 48000., 54000., 60000., 66000., 72000., 78000., 84000.,\n", - " 90000., 96000., 102000., 108000., 114000., 120000., 126000.,\n", - " 132000., 138000., 144000., 150000., 156000., 162000., 168000.,\n", - " 174000., 180000., 186000., 192000., 198000., 204000., 210000.,\n", - " 216000., 222000., 228000., 234000., 240000., 246000., 252000.,\n", - " 258000., 264000., 270000., 276000., 282000., 288000., 294000.,\n", - " 300000., 306000., 312000., 318000., 324000., 330000., 336000.,\n", - " 342000., 348000., 354000., 360000., 366000., 372000., 378000.,\n", - " 384000., 390000., 396000., 402000., 408000., 414000., 420000.,\n", - " 426000., 432000., 438000., 444000., 450000., 456000., 462000.,\n", - " 468000., 474000., 480000., 486000., 492000., 498000., 504000.,\n", - " 510000., 516000., 522000., 528000., 534000., 540000., 546000.,\n", - " 552000., 558000., 564000., 570000., 576000., 582000., 588000.,\n", - " 594000., 600000., 606000., 612000., 618000., 624000., 630000.,\n", - " 636000., 642000., 648000., 654000., 660000., 666000., 672000.,\n", - " 678000., 684000., 690000., 696000., 702000., 708000., 714000.,\n", - " 720000.]),\n", - " 'y': array([-2081.1920889 , -767.02743925, -664.38848272, -556.69336949,\n", - " -579.07925006, -551.19637447, -652.26681045, -662.51038193,\n", - " -478.91299177, -502.94201174, -469.50664575, -492.12584815,\n", - " -484.18153129, -468.52696592, -487.01721154, -514.17484799,\n", - " -473.0710636 , -471.62070198, -426.72455126, -421.75646457,\n", - " -409.64367927, -406.27356257, -418.10605297, -420.78321586,\n", - " -407.21932793, -408.94683318, -416.12102703, -410.68354116,\n", - " -389.23455677, -388.51677625, -384.8237186 , -387.13882778,\n", - " -392.56779655, -374.23316825, -377.61513994, -455.0377581 ,\n", - " -378.99716513, -366.55528553, -369.84259985, -369.63554248,\n", - " -373.86415388, -366.92398869, -363.85408277, -353.31190403,\n", - " -362.35191491, -363.00179269, -367.04453729, -445.82299096,\n", - " -384.75088711, -387.41522854, -413.42176036, -347.77848016,\n", - " -411.0996394 , -367.06053602, -381.71598643, -347.70598292,\n", - " -368.8461308 , -375.16817515, -372.16689181, -362.33045054,\n", - " -355.06020438, -363.82624938, -436.78482351, -355.47535361,\n", - " -356.20137162, -340.27150483, -356.8154747 , -355.86750986,\n", - " -357.28509399, -357.81957067, -355.49893022, -345.97537624,\n", - " -348.30000585, -432.94481922, -339.59353861, -355.08261821,\n", - " -348.65390885, -344.99835651, -356.18870413, -350.81615303,\n", - " -351.97187517, -443.32537408, -355.40992579, -342.09443648,\n", - " -349.17013918, -349.86271621, -351.77586711, -335.98053271,\n", - " -346.19767946, -353.25353305, -359.46394378, -341.46325383,\n", - " -348.82398305, -350.23307841, -355.61201976, -357.28338508,\n", - " -344.11499911, -351.36025522, -348.26705685, -350.7510658 ,\n", - " -367.13279963, -359.86400272, -345.76529293, -359.97636785,\n", - " -601.14395951, -358.85283193, -584.56042471, -345.19924877,\n", - " -354.48505366, -381.35473492, -335.38246286, -350.53859738,\n", - " -337.45949874, -344.60211967, -340.53707695, -331.14450892,\n", - " -344.98154363, -344.24920315, -344.98288129, -343.40420561]),\n", - " 'c': array([71.8, 69.6, 69.5, 68.1, 70.2, 68.7, 71.3, 70.3, 72.5, 74. , 72.9,\n", - " 71. , 74.7, 74.5, 74.8, 75. , 74.8, 75. , 74.8, 75. , 74.7, 74.4,\n", - " 75. , 75. , 75. , 75. , 75. , 75. , 75. , 74.5, 74.9, 75. , 75. ,\n", - " 75. , 73.8, 68. , 74.7, 72. , 74.8, 73.7, 74.9, 73.5, 71.3, 70.5,\n", - " 72.1, 73.3, 74. , 69.1, 74.3, 73.9, 68.8, 67.1, 69.1, 71.8, 73.3,\n", - " 67.1, 74.8, 74.9, 72.3, 72.4, 72. , 72.4, 71.8, 72.5, 73.4, 69.2,\n", - " 74.2, 73.8, 73.6, 74.6, 73.1, 71.3, 71.4, 62.2, 69.2, 73.7, 73.3,\n", - " 71.7, 74.1, 71.7, 72.7, 69.9, 73.6, 70.9, 72.7, 72.4, 72.9, 69.5,\n", - " 71.7, 70.5, 72.2, 68.8, 69.7, 71.2, 71.9, 71.9, 69.2, 70.9, 69.2,\n", - " 70.5, 73.7, 72.7, 68.9, 71.7, 66.1, 72.6, 61.2, 69.6, 72.9, 69.2,\n", - " 68.3, 72. , 69.4, 70.6, 71. , 68.3, 72. , 71.7, 71.4, 71.1])},\n", - " 2: {'x': array([ 6000., 12000., 18000., 24000., 30000., 36000., 42000.,\n", - " 48000., 54000., 60000., 66000., 72000., 78000., 84000.,\n", - " 90000., 96000., 102000., 108000., 114000., 120000., 126000.,\n", - " 132000., 138000., 144000., 150000., 156000., 162000., 168000.,\n", - " 174000., 180000., 186000., 192000., 198000., 204000., 210000.,\n", - " 216000., 222000., 228000., 234000., 240000., 246000., 252000.,\n", - " 258000., 264000., 270000., 276000., 282000., 288000., 294000.,\n", - " 300000., 306000., 312000., 318000., 324000., 330000., 336000.,\n", - " 342000., 348000., 354000., 360000., 366000., 372000., 378000.,\n", - " 384000., 390000., 396000., 402000., 408000., 414000., 420000.,\n", - " 426000., 432000., 438000., 444000., 450000., 456000., 462000.,\n", - " 468000., 474000., 480000., 486000., 492000., 498000., 504000.,\n", - " 510000., 516000., 522000., 528000., 534000., 540000., 546000.,\n", - " 552000., 558000., 564000., 570000., 576000., 582000., 588000.,\n", - " 594000., 600000., 606000., 612000., 618000., 624000., 630000.,\n", - " 636000., 642000., 648000., 654000., 660000., 666000., 672000.,\n", - " 678000., 684000., 690000., 696000., 702000., 708000., 714000.,\n", - " 720000.]),\n", - " 'y': array([-1240.60041327, -963.63139598, -933.52679612, -1131.10355204,\n", - " -797.52509207, -662.40552076, -574.98780263, -610.18861149,\n", - " -561.01239901, -538.23793379, -549.24297697, -520.39969498,\n", - " -508.72431756, -538.17116853, -556.86095618, -520.60999772,\n", - " -497.7539633 , -488.56380084, -466.66970327, -419.68478527,\n", - " -421.97702842, -418.0305072 , -422.67973353, -428.11335682,\n", - " -426.46057749, -357.53499607, -385.71559751, -368.90374207,\n", - " -371.60802584, -366.10374323, -360.92791998, -336.31133052,\n", - " -357.67273717, -338.80344547, -343.16303846, -326.57097767,\n", - " -321.02295945, -295.81569028, -276.95622882, -277.21966718,\n", - " -228.05912738, -227.91568619, -169.96712274, -90.88897106,\n", - " -123.42966779, -28.46566211, -15.56563772, -7.15888297,\n", - " -36.51003417, -5.74373714, -9.33690049, -5.42814785,\n", - " -11.03163926, -6.60145269, -6.89237933, -5.42834179,\n", - " -5.25545599, -6.90976747, -5.97435857, -5.19635841,\n", - " -6.44141575, -5.79745515, -4.4257237 , -4.08032538,\n", - " -3.99509375, -4.41206908, -8.61599332, -5.03720983,\n", - " -6.67571018, -4.07700952, -3.67254726, -4.86934642,\n", - " -4.9059107 , -3.76598201, -3.33319148, -3.49251006,\n", - " -2.86883772, -3.08877263, -2.55720285, -2.98141752,\n", - " -3.57704538, -2.71985706, -2.67762663, -3.05164586,\n", - " -2.97935712, -2.96721613, -2.60411183, -3.19708231,\n", - " -3.35083527, -2.97248639, -2.59229256, -3.02956229,\n", - " -2.98026883, -2.45492623, -2.6644369 , -2.6021496 ,\n", - " -2.88039605, -3.24528222, -3.21193219, -2.89837046,\n", - " -3.1984164 , -2.94590302, -2.80894181, -3.40852402,\n", - " -3.47659554, -2.97008327, -3.22899519, -2.87973584,\n", - " -2.71630854, -2.73793177, -3.21000082, -2.17388668,\n", - " -2.92101956, -2.94172411, -3.21990413, -2.56106493,\n", - " -2.65046665, -2.31605517, -2.96261584, -3.09033211]),\n", - " 'c': array([68.2, 70.6, 73.8, 74. , 74.3, 74.3, 74.5, 75. , 74.9, 75. , 75. ,\n", - " 75. , 74.9, 75. , 75. , 74.9, 74.2, 74.1, 74. , 72.8, 70.8, 69. ,\n", - " 71.7, 69.5, 70.5, 63.6, 66.6, 66.1, 67.4, 64.6, 68.1, 64.4, 67.9,\n", - " 63. , 63.4, 61.9, 60.8, 51.7, 50.9, 45.3, 42.3, 38.9, 32.4, 24.6,\n", - " 22.3, 13. , 5.7, 2.1, 4.4, 0. , 0. , 0.2, 0. , 1.4, 0. ,\n", - " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.1, 0.5, 1.4, 0.1,\n", - " 0.1, 0. , 1.3, 1.1, 0.5, 0. , 0.6, 0.4, 0. , 0. , 0. ,\n", - " 0. , 0.1, 0. , 0.5, 0.3, 0. , 0. , 0.1, 0.3, 0.5, 1.4,\n", - " 0.5, 0.9, 0. , 0.4, 0. , 0. , 0. , 0.2, 0.3, 0.8, 0.9,\n", - " 0.6, 0. , 0.2, 0.8, 1.7, 0.7, 0.8, 0.4, 0.2, 0.6])},\n", - " 3: {'x': array([ 6000., 12000., 18000., 24000., 30000., 36000., 42000.,\n", - " 48000., 54000., 60000., 66000., 72000., 78000., 84000.,\n", - " 90000., 96000., 102000., 108000., 114000., 120000., 126000.,\n", - " 132000., 138000., 144000., 150000., 156000., 162000., 168000.,\n", - " 174000., 180000., 186000., 192000., 198000., 204000., 210000.,\n", - " 216000., 222000., 228000., 234000., 240000., 246000., 252000.,\n", - " 258000., 264000., 270000., 276000., 282000., 288000., 294000.,\n", - " 300000., 306000., 312000., 318000., 324000., 330000., 336000.,\n", - " 342000., 348000., 354000., 360000., 366000., 372000., 378000.,\n", - " 384000., 390000., 396000., 402000., 408000., 414000., 420000.,\n", - " 426000., 432000., 438000., 444000., 450000., 456000., 462000.,\n", - " 468000., 474000., 480000., 486000., 492000., 498000., 504000.,\n", - " 510000., 516000., 522000., 528000., 534000., 540000., 546000.,\n", - " 552000., 558000., 564000., 570000., 576000., 582000., 588000.,\n", - " 594000., 600000., 606000., 612000., 618000., 624000., 630000.,\n", - " 636000., 642000., 648000., 654000., 660000., 666000., 672000.,\n", - " 678000., 684000., 690000., 696000., 702000., 708000., 714000.,\n", - " 720000.]),\n", - " 'y': array([-2595.02411546, -626.96935183, -1131.43018822, -1195.8927787 ,\n", - " -763.65612462, -472.72976861, -541.103559 , -563.46433626,\n", - " -626.21682835, -747.53263298, -441.85633566, -462.61558725,\n", - " -522.9066225 , -465.43199322, -538.96540754, -572.26465804,\n", - " -494.80899685, -477.13448196, -504.9487667 , -571.82776378,\n", - " -552.79505827, -516.10605354, -509.14388681, -472.19877673,\n", - " -479.54992597, -432.09361844, -436.73794658, -433.15148463,\n", - " -435.34208856, -435.28711362, -465.20989547, -452.17478931,\n", - " -411.51242034, -395.93722743, -388.59555641, -379.19863292,\n", - " -388.21011412, -411.35449381, -426.59421814, -373.31696818,\n", - " -379.87577367, -375.40659408, -388.49752338, -389.30409551,\n", - " -334.95160598, -358.35489009, -350.93179106, -302.88536045,\n", - " -354.25129305, -335.79367604, -329.38243738, -321.67400011,\n", - " -319.01441642, -300.8265001 , -328.50121454, -309.93972774,\n", - " -331.92052863, -306.04063366, -282.2496594 , -298.84995348,\n", - " -246.15496935, -258.3800972 , -281.64338628, -140.45881047,\n", - " -37.29127031, -36.0559548 , -53.16869903, -46.26902494,\n", - " -49.97242236, -58.32966641, -93.352066 , -24.35904422,\n", - " -24.31791836, -20.9612136 , -39.49524233, -18.08699778,\n", - " -19.01596288, -26.04138062, -6.36591305, -6.38827493,\n", - " -15.96207608, -8.48960392, -13.44429045, -7.65279237,\n", - " -13.20140687, -13.82495993, -7.44101084, -9.68309375,\n", - " -10.20882696, -9.96731366, -6.6257758 , -7.2165802 ,\n", - " -8.10285364, -8.4586155 , -7.43066755, -6.78135098,\n", - " -5.92453175, -5.88137233, -5.00803298, -5.85961277,\n", - " -5.54659623, -5.13442338, -4.92646169, -5.14089609,\n", - " -6.60575334, -4.85870038, -4.65111146, -4.25241617,\n", - " -3.71307123, -4.3216041 , -4.33601475, -3.61835982,\n", - " -3.03441758, -3.66246107, -3.1128362 , -3.35817505,\n", - " -2.96271382, -3.05400754, -2.73293895, -2.98145527]),\n", - " 'c': array([70.7, 68.5, 73.3, 73.5, 72.6, 71. , 71.4, 71.5, 71.9, 71.9, 68.3,\n", - " 71.6, 73.3, 73.2, 74.1, 74. , 73.4, 73. , 74. , 75. , 74.6, 73.3,\n", - " 72.2, 69.1, 69.6, 68.8, 68.2, 69. , 70.5, 72.1, 66.9, 67.9, 72.9,\n", - " 69.1, 66.4, 67.3, 69.8, 67.6, 68.9, 65.8, 64.5, 64.8, 68.1, 67.7,\n", - " 63.4, 66.8, 63.9, 59.2, 66.9, 63.3, 61.7, 60.1, 61.7, 57.3, 58.4,\n", - " 56.6, 60.1, 47.5, 49. , 56.6, 42.3, 30.4, 43. , 23.3, 18.5, 10.2,\n", - " 18.8, 13.3, 14.3, 12.4, 17.5, 5.6, 17.3, 3.3, 5.7, 0. , 0.3,\n", - " 2.7, 0.2, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", - " 0.5, 0. , 0. , 0.8, 0.3, 0. , 0.2, 0.2, 0. , 0.3])},\n", - " 4: {'x': array([ 6000., 12000., 18000., 24000., 30000., 36000., 42000.,\n", - " 48000., 54000., 60000., 66000., 72000., 78000., 84000.,\n", - " 90000., 96000., 102000., 108000., 114000., 120000., 126000.,\n", - " 132000., 138000., 144000., 150000., 156000., 162000., 168000.,\n", - " 174000., 180000., 186000., 192000., 198000., 204000., 210000.,\n", - " 216000., 222000., 228000., 234000., 240000., 246000., 252000.,\n", - " 258000., 264000., 270000., 276000., 282000., 288000., 294000.,\n", - " 300000., 306000., 312000., 318000., 324000., 330000., 336000.,\n", - " 342000., 348000., 354000., 360000., 366000., 372000., 378000.,\n", - " 384000., 390000., 396000., 402000., 408000., 414000., 420000.,\n", - " 426000., 432000., 438000., 444000., 450000., 456000., 462000.,\n", - " 468000., 474000., 480000., 486000., 492000., 498000., 504000.,\n", - " 510000., 516000., 522000., 528000., 534000., 540000., 546000.,\n", - " 552000., 558000., 564000., 570000., 576000., 582000., 588000.,\n", - " 594000., 600000., 606000., 612000., 618000., 624000., 630000.,\n", - " 636000., 642000., 648000., 654000., 660000., 666000., 672000.,\n", - " 678000., 684000., 690000., 696000., 702000., 708000., 714000.,\n", - " 720000.]),\n", - " 'y': array([-1550.3030233 , -1194.16134375, -547.57532582, -493.81109201,\n", - " -437.57301663, -549.26819188, -480.53350037, -475.20255316,\n", - " -521.82564544, -495.79329059, -497.71918014, -452.92134434,\n", - " -451.136396 , -417.63775488, -419.36086728, -375.53836589,\n", - " -371.4125876 , -378.32348377, -360.87733368, -338.07126206,\n", - " -350.05825794, -322.89697983, -317.32738299, -350.74750673,\n", - " -306.42923738, -305.67552043, -322.8665141 , -306.34129108,\n", - " -311.33556946, -324.36342826, -307.38141463, -338.4538372 ,\n", - " -329.61799469, -327.34825033, -302.85241539, -302.36759671,\n", - " -322.55479976, -306.28983573, -298.29725025, -287.77241567,\n", - " -309.39093364, -325.4298063 , -306.73540565, -303.9549021 ,\n", - " -312.9440672 , -279.2545339 , -294.21470557, -304.48225366,\n", - " -230.75983679, -251.61120584, -243.16939196, -119.25630518,\n", - " -112.18839761, -178.80070378, -52.21445805, -39.78892998,\n", - " -71.21581147, -22.72713195, -7.86372062, -7.85573913,\n", - " -6.81612832, -11.20393364, -16.51379594, -8.58449148,\n", - " -7.43806799, -5.73170687, -8.90419091, -7.72675621,\n", - " -13.95507499, -6.84218247, -8.31632738, -7.47147952,\n", - " -5.60119446, -4.12198354, -4.28636171, -6.93778368,\n", - " -183.26089237, -96.17466364, -4.91467823, -4.32119196,\n", - " -4.27990747, -4.16174641, -3.81602239, -6.1748364 ,\n", - " -6.21093074, -5.65926038, -6.04845082, -4.1200113 ,\n", - " -4.5630639 , -4.86188794, -3.98616056, -3.91942842,\n", - " -4.28392977, -4.41924144, -4.54479538, -4.92331199,\n", - " -3.23931337, -3.4145908 , -2.86732281, -3.07239207,\n", - " -3.0413795 , -3.27620814, -2.75215809, -4.5498506 ,\n", - " -3.43125751, -3.21464912, -3.01030865, -3.16263018,\n", - " -2.61943788, -2.93399824, -2.65181488, -3.28609981,\n", - " -3.33049908, -3.19436275, -2.76401025, -2.68017314,\n", - " -2.45776323, -9.37469983, -332.48028013, -356.40135888]),\n", - " 'c': array([69.7, 72.7, 67.7, 70.9, 72.2, 73.1, 74.2, 74.9, 75. , 74.8, 75. ,\n", - " 74.5, 74.5, 73.6, 72.9, 65.6, 71.4, 74. , 69. , 65.9, 72. , 67. ,\n", - " 65. , 63.9, 62.7, 62.6, 64. , 62.3, 64.9, 67.5, 64.2, 67.3, 64.4,\n", - " 65.2, 64. , 58.6, 62.3, 59. , 58.1, 54.2, 60.2, 67.2, 61.6, 55.7,\n", - " 59.1, 52.7, 55.5, 58.7, 42.5, 46.9, 45.4, 22.5, 24.5, 39.1, 13.5,\n", - " 10.9, 17.8, 5.5, 2.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.5, 5.9, 29.4,\n", - " 23.4, 1.3, 0.6, 1.8, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.2, 0.2, 0. ,\n", - " 0.3, 0.8, 1.2, 1.2, 0.3, 1.1, 0.8, 1.3, 1. , 0.9, 1.5,\n", - " 0.6, 2. , 3.2, 0.7, 0.4, 1. , 0.6, 6.5, 36.6, 43.3])}},\n", + "{'ppo': {0: {'x': array([ 3000., 6000., 9000., 12000., 15000., 18000., 21000., 24000.,\n", + " 27000., 30000., 33000., 36000., 39000., 42000., 45000., 48000.]),\n", + " 'y': array([ -88.15775064, -110.80896017, -116.10320637, -119.46548994,\n", + " -92.09106174, -64.25393533, -47.48677165, -69.30662023,\n", + " -60.30467331, -88.17607396, -23.59048325, -90.87252346,\n", + " -15.25030234, -8.78282656, -5.79158202, -7.30442645]),\n", + " 'z': array([ 4.15811497, 23.13705735, 10.31592974, 8.94511162, 5.67150428,\n", + " 39.36820806, 31.65101058, 33.26866465, 30.23007339, 14.44750956,\n", + " 17.70665165, 2.97173683, 6.8311226 , 1.72437304, 2.47379934,\n", + " 3.92283254]),\n", + " 'c': array([28.1, 28.2, 31.7, 34.4, 26.6, 14.7, 13.2, 17.9, 16.4, 23.1, 7. ,\n", + " 22.4, 2.9, 0.2, 0. , 0. ])},\n", + " 1: {'x': array([ 3000., 6000., 9000., 12000., 15000., 18000., 21000., 24000.,\n", + " 27000., 30000., 33000., 36000., 39000., 42000., 45000., 48000.]),\n", + " 'y': array([-67.91610023, -53.40671503, -64.62514255, -66.35840838,\n", + " -87.81387889, -94.22731699, -90.24905498, -58.79075258,\n", + " -44.35972097, -61.40454903, -9.30498245, -8.80209441,\n", + " -7.6044643 , -7.59277906, -4.68156383, -4.40672849]),\n", + " 'z': array([30.728283 , 29.74882595, 25.55996407, 30.31901437, 11.65889713,\n", + " 4.10021515, 16.68536504, 26.20849348, 38.80456634, 31.32378191,\n", + " 2.48632591, 1.37589148, 1.81055437, 1.06387001, 1.05117242,\n", + " 1.70916467]),\n", + " 'c': array([20.8, 17. , 20.5, 18.4, 28.6, 29.4, 26.4, 16.6, 9.4, 16.2, 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ])},\n", + " 2: {'x': array([ 3000., 6000., 9000., 12000., 15000., 18000., 21000., 24000.,\n", + " 27000., 30000., 33000., 36000., 39000., 42000., 45000., 48000.]),\n", + " 'y': array([-100.39980478, -73.04375601, -93.34557955, -89.92046596,\n", + " -85.59018073, -51.13473029, -81.70017771, -70.21202545,\n", + " -60.47638877, -60.65871857, -65.39839124, -35.76429277,\n", + " -13.97048527, -7.68179415, -10.19019997, -8.95580367]),\n", + " 'z': array([ 2.19992819, 21.34051787, 3.05532206, 3.3999837 , 1.65030495,\n", + " 37.66485366, 7.77635885, 26.57867691, 33.14778177, 34.87602746,\n", + " 20.90141774, 22.53970922, 1.42179221, 1.70779423, 1.46701376,\n", + " 1.61871985]),\n", + " 'c': array([31.3, 22.3, 34.5, 31.7, 31.2, 16.5, 25.6, 21. , 16. , 15.5, 16.3,\n", + " 9.2, 0.9, 0. , 0. , 0. ])},\n", + " 3: {'x': array([ 3000., 6000., 9000., 12000., 15000., 18000., 21000., 24000.,\n", + " 27000., 30000., 33000., 36000., 39000., 42000., 45000., 48000.]),\n", + " 'y': array([-83.02402423, -71.77818481, -82.05796674, -90.54551854,\n", + " -85.56271133, -85.31685532, -84.4121318 , -90.78979229,\n", + " -47.90072649, -25.51680556, -8.27271889, -11.85757268,\n", + " -15.75341708, -8.37725403, -4.88154136, -6.44937024]),\n", + " 'z': array([ 3.78417064, 22.99658609, 0.57995264, 2.58985925, 2.99997822,\n", + " 3.71056011, 4.35576055, 3.16850439, 29.38641626, 21.39618361,\n", + " 4.74721975, 3.30760283, 5.2505719 , 1.44092333, 1.77630397,\n", + " 2.14031609]),\n", + " 'c': array([29.4, 23.5, 32.2, 33.2, 27.5, 28.9, 26.5, 33.3, 13.5, 6.4, 0.5,\n", + " 1.5, 1.5, 0. , 0. , 0. ])},\n", + " 4: {'x': array([ 3000., 6000., 9000., 12000., 15000., 18000., 21000., 24000.,\n", + " 27000., 30000., 33000., 36000., 39000., 42000., 45000., 48000.]),\n", + " 'y': array([ -78.25603452, -88.15755932, -80.85823162, -135.88484029,\n", + " -118.20620823, -84.56251778, -99.15056706, -61.05523057,\n", + " -45.49561499, -57.24070411, -59.42362142, -43.99554657,\n", + " -35.63985117, -40.93778084, -32.83323752, -21.4385759 ]),\n", + " 'z': array([21.53768478, 22.37562905, 2.08860279, 5.38289836, 7.55217166,\n", + " 5.50235804, 21.73379088, 40.02883866, 36.12166915, 30.18096517,\n", + " 28.59859709, 31.14269571, 28.420123 , 25.70969362, 20.99195807,\n", + " 6.80190971]),\n", + " 'c': array([27. , 24.6, 29.4, 37.2, 35.9, 29.3, 28.7, 18.7, 12.9, 16.6, 17.1,\n", + " 10.9, 8.6, 10.9, 8.6, 2.8])},\n", + " 5: {'x': array([ 3000., 6000., 9000., 12000., 15000., 18000., 21000., 24000.,\n", + " 27000., 30000., 33000., 36000., 39000., 42000., 45000., 48000.]),\n", + " 'y': array([ -95.01403974, -123.79892124, -102.7442057 , -89.815646 ,\n", + " -39.66297729, -46.9252954 , -67.41175488, -11.8282033 ,\n", + " -14.71989308, -6.40495247, -10.55180597, -6.38568054,\n", + " -6.68760819, -4.69920376, -4.80615511, -4.50284142]),\n", + " 'z': array([14.30988285, 12.37001644, 9.79027082, 21.98219231, 34.02801021,\n", + " 34.4690545 , 31.91678882, 3.58677587, 3.01341989, 1.91630836,\n", + " 1.59130328, 1.15350047, 1.79735957, 1.34250451, 1.36681713,\n", + " 1.08033008]),\n", + " 'c': array([24.3, 31.7, 29. , 26.1, 11.1, 10.9, 16.8, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ])},\n", + " 6: {'x': array([ 3000., 6000., 9000., 12000., 15000., 18000., 21000., 24000.,\n", + " 27000., 30000., 33000., 36000., 39000., 42000., 45000., 48000.]),\n", + " 'y': array([-65.32806028, -83.67334789, -83.91174635, -78.10015514,\n", + " -73.11778666, -67.93214535, -48.66061502, -64.25442483,\n", + " -60.3782076 , -17.24724205, -13.53857026, -10.07927537,\n", + " -7.24454048, -7.77040104, -5.06210837, -6.25896362]),\n", + " 'z': array([31.58735223, 0.74757479, 0.71705823, 21.69671301, 27.93631289,\n", + " 26.89142816, 34.08134048, 23.3786628 , 23.3090608 , 16.39541779,\n", + " 15.96755933, 1.22367773, 3.40950729, 1.41821465, 1.07917687,\n", + " 2.53668562]),\n", + " 'c': array([19.4, 31.3, 33.9, 27. , 17.9, 18.5, 14.3, 18. , 15.4, 4.4, 1.6,\n", + " 0. , 0. , 0. , 0. , 0. ])},\n", + " 7: {'x': array([ 3000., 6000., 9000., 12000., 15000., 18000., 21000., 24000.,\n", + " 27000., 30000., 33000., 36000., 39000., 42000., 45000., 48000.]),\n", + " 'y': array([ -73.76297787, -84.11068413, -99.08956487, -102.73139077,\n", + " -65.98193177, -63.72649271, -66.35557655, -47.00841884,\n", + " -27.83754147, -58.65351462, -17.91631418, -31.3729593 ,\n", + " -11.30332785, -8.96608866, -6.9142119 , -9.28875209]),\n", + " 'z': array([18.64289795, 1.06408548, 4.642824 , 4.91191071, 27.02547765,\n", + " 30.32041875, 34.69096995, 35.3872896 , 28.99568217, 32.88988769,\n", + " 5.5118178 , 10.67491732, 2.00321804, 1.42482125, 2.24978546,\n", + " 0.61636763]),\n", + " 'c': array([25.5, 33.4, 33.8, 33.9, 21.2, 19.4, 16.8, 13.5, 6.4, 14.1, 0.2,\n", + " 2.1, 0. , 0. , 0. , 0. ])},\n", + " 8: {'x': array([ 3000., 6000., 9000., 12000., 15000., 18000., 21000., 24000.,\n", + " 27000., 30000., 33000., 36000., 39000., 42000., 45000., 48000.]),\n", + " 'y': array([-76.14318458, -38.61504398, -77.26428041, -90.65647631,\n", + " -73.78944513, -50.69761194, -84.72772427, -19.97462745,\n", + " -15.71906944, -11.7142282 , -16.10601562, -6.08499132,\n", + " -5.43795618, -5.63244445, -5.78452319, -5.43550785]),\n", + " 'z': array([18.47014595, 40.93644303, 19.87684185, 4.00407135, 19.84974271,\n", + " 34.03450903, 8.58124619, 12.31322311, 14.16381784, 2.02424316,\n", + " 3.50018079, 2.39915506, 1.94608352, 1.75336086, 2.00400833,\n", + " 2.5019934 ]),\n", + " 'c': array([23.9, 13.2, 22.2, 25.3, 22.5, 14.3, 22.7, 5. , 1.8, 0. , 0.9,\n", + " 0. , 0. , 0. , 0. , 0. ])},\n", + " 9: {'x': array([ 3000., 6000., 9000., 12000., 15000., 18000., 21000., 24000.,\n", + " 27000., 30000., 33000., 36000., 39000., 42000., 45000., 48000.]),\n", + " 'y': array([ -87.11240468, -88.58981476, -116.05483179, -89.40426019,\n", + " -83.31175542, -84.47527842, -45.14124752, -54.92142752,\n", + " -25.00155725, -23.71708584, -16.51391822, -9.73139814,\n", + " -15.01075531, -6.56120037, -8.15728398, -6.27575905]),\n", + " 'z': array([ 3.62630277, 9.51038404, 9.04789158, 5.28366629, 13.67574873,\n", + " 2.53608348, 27.87970085, 29.28781756, 16.22092121, 19.62062683,\n", + " 4.48558986, 1.72619632, 2.37281374, 1.07180294, 0.71999168,\n", + " 1.37417581]),\n", + " 'c': array([29.7, 26.3, 34. , 28.4, 25.4, 26.7, 12.6, 15.7, 7.2, 4.2, 0.1,\n", + " 0. , 0. , 0. , 0. , 0. ])}},\n", " 'sac': {0: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", - " 8500., 9000., 9500., 10000., 10500., 11000., 11500., 12000.,\n", - " 12500., 13000., 13500., 14000., 14500., 15000., 15500., 16000.,\n", - " 16500., 17000., 17500., 18000., 18500., 19000., 19500., 20000.,\n", - " 20500., 21000., 21500., 22000., 22500., 23000., 23500., 24000.,\n", - " 24500., 25000., 25500., 26000., 26500., 27000., 27500., 28000.,\n", - " 28500., 29000., 29500., 30000., 30500., 31000., 31500., 32000.,\n", - " 32500., 33000., 33500., 34000., 34500., 35000., 35500., 36000.,\n", - " 36500., 37000., 37500., 38000., 38500., 39000., 39500., 40000.,\n", - " 40500., 41000., 41500., 42000., 42500., 43000., 43500., 44000.,\n", - " 44500., 45000., 45500., 46000., 46500., 47000., 47500., 48000.,\n", - " 48500., 49000., 49500., 50000.]),\n", - " 'y': array([-2207.06773831, -2117.67943905, -551.04386437, -590.05304758,\n", - " -2831.79414787, -566.03010933, -6588.12435863, -964.10067527,\n", - " -631.16948533, -460.21299395, -463.59737553, -445.37279494,\n", - " -436.23489907, -434.64780706, -601.04822929, -430.79555494,\n", - " -446.63970408, -474.63919714, -447.04893141, -444.06146417,\n", - " -457.44239881, -443.11593619, -443.49116007, -452.33985722,\n", - " -457.62808993, -458.89936599, -441.09380075, -450.33577102,\n", - " -442.64810277, -532.07590241, -426.62426052, -440.82777403,\n", - " -467.3498813 , -415.01285843, -417.16557887, -440.69252751,\n", - " -423.01242895, -420.16424804, -697.54827221, -530.10647135,\n", - " -461.33400932, -455.23493614, -432.47789691, -425.97486177,\n", - " -423.33507737, -566.35222126, -462.78532791, -589.80277173,\n", - " -569.52402762, -491.26496572, -452.30023126, -462.41720115,\n", - " -458.73025193, -445.01151934, -501.89308882, -442.73961928,\n", - " -434.40621482, -441.20753129, -447.78775366, -415.18201438,\n", - " -396.13880542, -406.74219593, -407.4954307 , -405.75044259,\n", - " -374.37702905, -462.11623021, -402.86422688, -387.01668285,\n", - " -1978.48893089, -424.5550627 , -426.32978458, -462.91302006,\n", - " -412.8554165 , -351.29712272, -398.05518014, -431.02593602,\n", - " -442.07817245, -443.78263741, -414.40314742, -410.39447363,\n", - " -406.3826611 , -389.15277467, -402.15158614, -390.30438573,\n", - " -407.34766499, -378.58587573, -392.50121527, -390.63268597,\n", - " -307.65739018, -408.64016364, -420.76629092, -407.73827645,\n", - " -466.8192808 , -460.35901712, -401.34921201, -423.85426561,\n", - " -418.30363483, -418.03619657, -412.83595358, -421.00202948]),\n", - " 'c': array([66.5, 75. , 75. , 75. , 75. , 75. , 75. , 75. , 75. , 75. , 75. ,\n", - " 75. , 75. , 75. , 75. , 75. , 75. , 75. , 75. , 75. , 75. , 75. ,\n", - " 75. , 75. , 75. , 74.4, 73.3, 73.8, 73.3, 68.1, 75. , 75. , 75. ,\n", - " 75. , 75. , 75. , 71.7, 60.9, 75. , 75. , 75. , 75. , 75. , 74.7,\n", - " 73.4, 72.7, 71.8, 72.8, 72.7, 72.7, 71. , 71. , 72. , 71.6, 71.3,\n", - " 75. , 71.1, 70.8, 70.3, 69.7, 62.7, 65.2, 60.6, 65.1, 54.1, 70.1,\n", - " 71.1, 67.3, 58.2, 68.5, 72. , 72.6, 72.4, 62.7, 70.2, 72.6, 72.7,\n", - " 71.3, 70.8, 73.4, 72.8, 74.2, 73.6, 72.2, 74. , 72.8, 70.1, 69.6,\n", - " 66.1, 66.6, 71.6, 73.7, 73.2, 75. , 64.4, 74.8, 74.3, 75. , 70.6,\n", - " 66.1])},\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-78.07790812, -60.33753125, -29.63921306, -60.75737487,\n", + " -7.19386485, -7.20750915, -7.70115104, -8.68704632,\n", + " -8.42646033, -5.86816584, -5.69042965, -5.18544206,\n", + " -6.0289549 , -6.30382655, -5.5702274 , -5.2551961 ,\n", + " -5.84007806, -6.97435863, -5.74449092, -5.75024993]),\n", + " 'z': array([19.84491073, 43.95713772, 34.81889161, 34.62960477, 2.04793161,\n", + " 1.64875443, 1.33900012, 2.53493957, 1.34047503, 1.36565155,\n", + " 1.11282274, 0.48880285, 1.24949952, 1.31641216, 1.07702055,\n", + " 1.48261893, 1.32239444, 1.60655968, 1.15342742, 0.89049145]),\n", + " 'c': array([24.5, 14.7, 7.5, 15.1, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 1: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-65.24821296, -24.43987314, -17.54505894, -12.43690074,\n", + " -7.20531116, -6.61096602, -6.97821511, -5.51898878,\n", + " -6.37463325, -5.33324126, -5.37788904, -5.27448811,\n", + " -5.72307335, -6.01937392, -4.894859 , -5.92607578,\n", + " -6.53192607, -6.29820647, -5.89273643, -6.85415147]),\n", + " 'z': array([33.64595278, 37.50972551, 8.71057061, 7.02629526, 1.74683554,\n", + " 1.84420431, 2.22292818, 1.93067024, 1.31936557, 1.82069635,\n", + " 1.58433952, 1.42841273, 1.47304957, 1.52337978, 1.24834965,\n", + " 1.13248805, 2.35201219, 1.79630743, 1.229116 , 1.68602352]),\n", + " 'c': array([18.2, 5.4, 0.9, 0.4, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 2: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-69.35735261, -57.95267831, -27.03133304, -7.35251776,\n", + " -9.74536485, -5.66213119, -5.26494809, -5.45806351,\n", + " -5.0730373 , -6.60276861, -5.35123771, -4.64795848,\n", + " -7.17886732, -6.10599183, -7.2799352 , -6.40995668,\n", + " -5.79168466, -5.55089296, -6.06540081, -5.7944525 ]),\n", + " 'z': array([28.87250967, 31.3791714 , 26.16133977, 2.29118464, 2.40681472,\n", + " 1.4120881 , 1.05344095, 1.29049847, 1.49915448, 1.99894645,\n", + " 2.07842612, 1.21846415, 0.96939657, 1.49616192, 1.39116869,\n", + " 1.58489047, 1.05979279, 0.56014305, 1.18636849, 1.20012059]),\n", + " 'c': array([20.3, 13.9, 5.8, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 3: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([ -69.06920537, -62.39885709, -20.20209215, -23.24543596,\n", + " -8.60319992, -8.480007 , -6.59101948, -12.26020956,\n", + " -8.86206397, -7.74456537, -10.05795771, -10.10407839,\n", + " -9.98968216, -10.61858908, -750.93058322, -375.49741798,\n", + " -268.30870606, -280.79743834, -259.59869236, -261.78042475]),\n", + " 'z': array([23.03818139, 35.47001515, 12.1680979 , 8.33074529, 2.43479351,\n", + " 2.92771657, 2.22521414, 4.43897227, 1.09660478, 1.46274702,\n", + " 2.58978269, 1.91678454, 2.62566084, 2.37970549, 20.79951087,\n", + " 10.53053647, 5.73098006, 4.79021059, 2.81688544, 1.60900841]),\n", + " 'c': array([20.2, 13.1, 0.4, 2.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 46.1, 45. , 44.9, 45. , 45.4, 44.1])},\n", + " 4: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([ -76.89032409, -44.50686882, -45.47539624, -6.65421714,\n", + " -9.32473289, -6.75548172, -5.52250068, -6.1608249 ,\n", + " -4.58609088, -6.3651977 , -15.0163006 , -26.62013013,\n", + " -182.21047214, -441.53744631, -456.34930916, -547.62863901,\n", + " -337.98687975, -146.25134802, -130.88974202, -137.36593503]),\n", + " 'z': array([12.69779152, 38.43783449, 33.67128192, 3.89107248, 1.68747783,\n", + " 1.73313469, 1.31463171, 2.24104178, 1.8563587 , 2.1101253 ,\n", + " 10.087443 , 17.83370861, 27.84269648, 39.78186747, 33.38760259,\n", + " 35.8320893 , 22.40331481, 2.71972203, 1.67895073, 5.14868679]),\n", + " 'c': array([19.5, 11.8, 8. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 6.2, 21.4, 34.6, 38.3, 38.8, 38.6, 35.7, 36. , 19.9])},\n", + " 5: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-65.89964499, -52.82131708, -43.08648805, -12.17284954,\n", + " -9.77086312, -7.57493878, -5.93784215, -6.13931906,\n", + " -7.16364275, -5.33226168, -6.34258326, -7.66062225,\n", + " -9.75888468, -6.33416813, -6.97436693, -6.3959969 ,\n", + " -8.33078707, -7.07954414, -7.05645984, -6.17812513]),\n", + " 'z': array([33.47902333, 39.40488504, 35.05966812, 12.86299352, 4.20217461,\n", + " 2.18442224, 1.97054841, 2.76348552, 2.43071494, 0.77255432,\n", + " 1.07973661, 1.67228861, 2.69925011, 1.45342565, 1.76673998,\n", + " 0.90334686, 1.01542112, 1.46867459, 1.19474122, 1.0804704 ]),\n", + " 'c': array([18.6, 13.2, 8.9, 1.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 6: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([ -73.12660912, -66.87809109, -26.98915078, -15.85976783,\n", + " -7.38437392, -6.35743866, -6.20616661, -6.66104479,\n", + " -9.78754722, -5.70179199, -7.10141513, -7.32442731,\n", + " -7.63959533, -325.1037648 , -2197.01475267, -619.32015768,\n", + " -417.56966432, -387.68059481, -335.28691077, -210.81929253]),\n", + " 'z': array([ 23.79420361, 31.00769587, 21.34951728, 10.13177236,\n", + " 3.55824778, 2.17232342, 2.55821224, 1.37710935,\n", + " 2.76360721, 1.67922421, 2.21043593, 1.45907418,\n", + " 2.18957235, 390.26788595, 11.8424026 , 5.43033293,\n", + " 1.87728046, 2.81303186, 3.44426974, 15.40038201]),\n", + " 'c': array([21. , 15.2, 3. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 13.2, 47. , 47. , 47. , 46.8, 45.1, 42.1])},\n", + " 7: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-86.14745349, -69.82938704, -60.55254439, -14.04169523,\n", + " -7.34542197, -7.2115004 , -10.55462428, -9.00398941,\n", + " -10.55297465, -10.35577563, -10.16653991, -8.10488631,\n", + " -9.65560524, -6.9296579 , -8.60702596, -7.21454063,\n", + " -13.10179014, -5.93049151, -5.92612057, -26.02003013]),\n", + " 'z': array([ 2.18792122, 29.35488398, 44.26681783, 9.05993667, 1.98442795,\n", + " 2.47778851, 3.42120682, 2.97468921, 1.86665993, 2.15497783,\n", + " 2.75509479, 2.1312129 , 2.7818385 , 2.26897937, 1.6689237 ,\n", + " 1.67910899, 9.38348996, 0.84031475, 1.26995393, 10.14208448]),\n", + " 'c': array([25.6, 16.7, 14.1, 0.4, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 1.6, 0. , 0. , 0.3])},\n", + " 8: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-74.9859112 , -69.01984779, -23.65836429, -31.19489791,\n", + " -16.4698042 , -6.11075637, -6.71493862, -5.94775284,\n", + " -6.67796766, -6.54471716, -9.22281525, -5.90887619,\n", + " -6.03234983, -5.61588372, -7.04708809, -4.71964615,\n", + " -6.39281846, -8.10944542, -7.17309419, -5.72201423]),\n", + " 'z': array([22.52407981, 20.69104383, 13.71149863, 22.93448607, 8.89372125,\n", + " 1.70029396, 2.28735753, 1.35420945, 2.20279422, 2.44834516,\n", + " 3.2820735 , 1.73847588, 1.28879417, 0.80597151, 1.28274854,\n", + " 1.18829384, 1.74807889, 2.22482051, 1.68242802, 1.69691565]),\n", + " 'c': array([25.1, 18. , 6.2, 10.7, 2.9, 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 9: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-69.60505268, -62.10412986, -24.58642659, -10.59314954,\n", + " -10.67013602, -7.78939339, -7.13856097, -5.61309561,\n", + " -6.18964005, -6.57873348, -7.34552924, -8.00388536,\n", + " -5.64702154, -7.41298009, -7.7534408 , -23.97246065,\n", + " -43.34849873, -15.53243225, -12.07979048, -6.82981321]),\n", + " 'z': array([25.43346732, 48.16056757, 26.83800791, 6.64509126, 4.08690817,\n", + " 2.95224512, 1.61868099, 1.12129537, 2.88810154, 2.20185379,\n", + " 2.06130777, 3.04401063, 0.98670066, 2.94783452, 3.23984619,\n", + " 12.53312986, 46.4391647 , 20.68746487, 5.66447056, 2.37961924]),\n", + " 'c': array([22.1, 13.1, 3.2, 0.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0.9, 4.5, 1.1, 0. , 0. ])}},\n", + " 'td3': {0: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-85.94421168, -70.06199891, -88.3518554 , -58.78658976,\n", + " -35.89540506, -5.82941956, -4.9417647 , -7.37047535,\n", + " -7.24192199, -5.8245196 , -5.20987856, -5.32613607,\n", + " -7.02438922, -6.00310981, -6.16076265, -6.06208038,\n", + " -5.80742577, -5.99900247, -5.55101179, -6.51476477]),\n", + " 'z': array([22.00836082, 28.41609996, 19.45257435, 37.81685459, 40.53740952,\n", + " 1.76770043, 1.69423616, 2.9786095 , 2.67761847, 1.45400024,\n", + " 1.40209404, 1.18736774, 1.70186473, 1.83284751, 2.03263634,\n", + " 2.03417312, 1.80392433, 2.06413943, 1.74770794, 1.64181311]),\n", + " 'c': array([22.7, 20.5, 23.8, 13. , 4.7, 0.2, 0. , 0. , 0. , 0. , 0. ,\n", + " 0.2, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 1: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-88.1460536 , -45.94947908, -72.08559651, -57.45342627,\n", + " -74.96474968, -60.11658654, -25.26550983, -29.32336914,\n", + " -9.07853327, -7.60492815, -6.37685471, -6.64473372,\n", + " -5.45837994, -5.88706237, -6.26822371, -6.13553622,\n", + " -7.28173944, -6.52344741, -7.07458376, -16.89156441]),\n", + " 'z': array([28.49763712, 41.19661576, 19.41998835, 31.86997608, 28.947596 ,\n", + " 31.18194991, 21.98156012, 28.64739858, 1.65128532, 2.71596572,\n", + " 1.99293239, 1.79777877, 1.81693931, 2.34535738, 1.40003113,\n", + " 1.36194808, 2.30500656, 2.02489599, 1.36382605, 15.7198095 ]),\n", + " 'c': array([22.7, 13.3, 19.7, 14.8, 17.8, 13.8, 3.6, 4.7, 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 2.4])},\n", + " 2: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([ -98.5289618 , -102.24355779, -84.02528359, -107.08753813,\n", + " -87.27047944, -73.89035748, -12.0645071 , -10.89341377,\n", + " -10.99018025, -27.67190238, -20.66131911, -13.40694394,\n", + " -13.78103301, -130.4538544 , -150.61173877, -79.03635174,\n", + " -59.44704119, -13.33556566, -18.26920826, -150.85806681]),\n", + " 'z': array([25.62763485, 5.68424539, 26.56401701, 48.0067934 , 32.32528243,\n", + " 26.51638531, 2.42785593, 3.42010511, 4.62545024, 18.38454035,\n", + " 12.61100562, 8.99854172, 3.06996942, 98.39072559, 26.889394 ,\n", + " 19.7327986 , 35.45527924, 9.17399271, 19.00256783, 7.53924281]),\n", + " 'c': array([23. , 28.2, 21.4, 19.2, 17.6, 17.4, 0. , 0. , 0. , 0. , 0.3,\n", + " 1.6, 0. , 16.3, 32. , 22.7, 15.6, 3.5, 2.8, 32.3])},\n", + " 3: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-81.17712593, -93.9318703 , -93.34002398, -11.75896037,\n", + " -29.55926182, -13.30365237, -9.84147134, -15.48659984,\n", + " -9.30149957, -8.17068386, -33.37286029, -18.71896217,\n", + " -11.37618481, -9.01509544, -7.74705464, -9.75446444,\n", + " -5.50155901, -9.86711277, -11.08479701, -11.66659277]),\n", + " 'z': array([19.57616024, 27.5174342 , 21.51364681, 5.13931048, 15.87992909,\n", + " 11.39115668, 1.8216829 , 13.49331146, 3.47620495, 1.96155532,\n", + " 29.199426 , 13.10804325, 3.24274847, 2.61679213, 2.5945896 ,\n", + " 3.02973773, 2.92192891, 3.3473363 , 1.82037911, 3.84121644]),\n", + " 'c': array([26.3, 26.7, 21.6, 0.3, 7.2, 2.7, 0. , 1.5, 0. , 0. , 7.3,\n", + " 2.6, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 4: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-63.60283691, -99.64012494, -79.57277764, -64.42824744,\n", + " -40.91249357, -47.75957188, -40.79252709, -7.67718137,\n", + " -5.96233383, -8.9197078 , -6.99532597, -7.20078574,\n", + " -8.51271868, -6.11621019, -6.64270924, -6.46947919,\n", + " -8.10823296, -6.73752411, -7.31021255, -6.17837895]),\n", + " 'z': array([24.4758332 , 29.54834317, 20.36304887, 32.20340305, 35.95879232,\n", + " 29.97400078, 31.81034795, 3.13478173, 2.71438894, 4.76449562,\n", + " 5.32753978, 2.04918421, 2.03030747, 1.77817332, 1.51427702,\n", + " 1.45356767, 1.58260899, 1.84827779, 1.99469583, 1.95895778]),\n", + " 'c': array([18.4, 23.8, 21.8, 17. , 11.5, 13.9, 11.5, 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 5: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-84.17921624, -75.48351039, -83.36004308, -82.28601454,\n", + " -75.1940769 , -53.56318152, -61.15271655, -80.95176007,\n", + " -60.80490099, -34.6495125 , -23.22871368, -48.77044682,\n", + " -27.42559418, -8.91388802, -6.78943397, -7.98161263,\n", + " -8.09689424, -13.54414852, -12.82482307, -13.37233233]),\n", + " 'z': array([15.52965767, 23.11948828, 7.45922424, 16.53932144, 26.73992653,\n", + " 35.63630325, 43.9307807 , 38.96964603, 37.32594168, 27.84997745,\n", + " 23.53194294, 37.70893402, 23.91573772, 2.57305833, 1.91073188,\n", + " 1.73090874, 1.6957232 , 1.60634858, 2.72565049, 4.34738185]),\n", + " 'c': array([24.1, 22.3, 22.9, 23.1, 19.5, 14.8, 13.1, 19.4, 13.9, 7.7, 4.2,\n", + " 9.4, 4.9, 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 6: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-154.34850522, -120.10633844, -53.41207805, -65.04308116,\n", + " -36.90420265, -39.5696131 , -48.95626 , -22.0555279 ,\n", + " -9.89730637, -7.37245373, -7.37403052, -6.75279697,\n", + " -7.24332818, -6.78674071, -7.96264292, -7.94443069,\n", + " -7.69252758, -8.67176996, -9.87883256, -12.22547147]),\n", + " 'z': array([21.3295575 , 49.62113909, 38.43709574, 39.13836323, 21.8348643 ,\n", + " 30.03320835, 32.25098733, 18.59139114, 6.07970331, 1.16559597,\n", + " 1.84826624, 1.16065894, 1.75867394, 2.20756202, 0.7946329 ,\n", + " 1.19733069, 0.55129436, 1.11288335, 1.69025951, 3.03667543]),\n", + " 'c': array([29.6, 18.9, 13.5, 16.6, 10.9, 10.1, 11.2, 4.3, 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 7: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-102.63481832, -172.71014804, -103.77656218, -24.05083644,\n", + " -7.66221092, -7.90928273, -8.46990309, -17.49743608,\n", + " -6.33918355, -45.11152547, -42.65535513, -7.68147045,\n", + " -6.60143647, -7.61578275, -6.34325229, -7.88846321,\n", + " -9.66269034, -6.74711147, -6.94283885, -7.09796546]),\n", + " 'z': array([19.05515338, 68.19663241, 41.56309264, 19.435171 , 4.26060053,\n", + " 4.27840437, 3.7185322 , 23.37100265, 2.54804485, 43.67874513,\n", + " 40.3249004 , 1.95806576, 1.83900344, 2.0136299 , 1.88739857,\n", + " 2.81153385, 4.09005582, 0.82518325, 0.86247953, 2.42411192]),\n", + " 'c': array([24.7, 20.6, 21.7, 4.2, 0. , 0.1, 0. , 2. , 0. , 8.6, 8.5,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 8: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-92.20479425, -19.47851148, -26.84153924, -60.18777517,\n", + " -16.55054841, -8.94515712, -12.34361683, -15.25074933,\n", + " -6.70738605, -17.09117543, -8.09510113, -9.69472508,\n", + " -11.64001963, -8.53629672, -8.25961659, -6.72398336,\n", + " -8.44622293, -10.03643956, -7.58991522, -8.72999117]),\n", + " 'z': array([16.34015432, 16.98137133, 18.47352252, 33.15342032, 10.28662385,\n", + " 4.74198897, 3.33383242, 13.4792587 , 2.5046929 , 14.97965586,\n", + " 2.91004502, 1.70101924, 1.32698971, 0.62890362, 1.36244358,\n", + " 1.54492238, 1.77453121, 3.93491631, 2.31683249, 1.91600621]),\n", + " 'c': array([22.5, 2.7, 5.4, 16.1, 2.3, 0. , 2.1, 3.4, 0. , 0.7, 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 9: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-90.7885944 , -87.11952652, -64.70260735, -19.88583916,\n", + " -11.20716592, -73.29198892, -21.61302407, -10.02653052,\n", + " -9.82398538, -6.7724848 , -9.30154277, -11.07868205,\n", + " -9.58243823, -7.53893544, -8.12099766, -10.73081034,\n", + " -13.73528459, -11.1555124 , -15.45595653, -29.84646264]),\n", + " 'z': array([ 6.77511907, 11.74216639, 37.61303157, 18.157194 , 7.29019766,\n", + " 18.47515421, 37.19238422, 3.34941509, 1.62778328, 1.69626118,\n", + " 2.1458154 , 3.67711941, 3.06687186, 2.09753463, 2.73303713,\n", + " 4.0618146 , 7.10106535, 2.65942251, 7.52857632, 37.59229216]),\n", + " 'c': array([25.7, 23.7, 17.4, 6.8, 2.6, 17.6, 4.3, 2.1, 1.1, 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1.8])}},\n", + " 'ddpg': {0: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([ -95.95716586, -77.536507 , -90.2450936 , -73.37329898,\n", + " -82.54700179, -80.78333082, -51.6642578 , -64.8446082 ,\n", + " -76.75541837, -14.35556661, -85.20919299, -13.37806732,\n", + " -72.13630312, -206.19050725, -450.77263209, -509.61860935,\n", + " -209.64849755, -166.91184915, -126.29368466, -18.64014551]),\n", + " 'z': array([ 37.62329252, 20.1873807 , 26.12227757, 39.15299444,\n", + " 36.23030113, 40.00687884, 33.29503219, 35.15646545,\n", + " 40.20367932, 9.95610163, 21.40140716, 5.26507638,\n", + " 95.21710862, 110.63661417, 61.29253974, 51.43481752,\n", + " 28.04528254, 41.45146256, 30.83840654, 2.71839663]),\n", + " 'c': array([22.4, 23.5, 19.7, 18.2, 12.4, 16.2, 11.8, 16.5, 21.1, 3.6, 10.6,\n", + " 0. , 5.7, 15.3, 19.6, 31.3, 25.8, 25. , 15.3, 0. ])},\n", " 1: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", - " 8500., 9000., 9500., 10000., 10500., 11000., 11500., 12000.,\n", - " 12500., 13000., 13500., 14000., 14500., 15000., 15500., 16000.,\n", - " 16500., 17000., 17500., 18000., 18500., 19000., 19500., 20000.,\n", - " 20500., 21000., 21500., 22000., 22500., 23000., 23500., 24000.,\n", - " 24500., 25000., 25500., 26000., 26500., 27000., 27500., 28000.,\n", - " 28500., 29000., 29500., 30000., 30500., 31000., 31500., 32000.,\n", - " 32500., 33000., 33500., 34000., 34500., 35000., 35500., 36000.,\n", - " 36500., 37000., 37500., 38000., 38500., 39000., 39500., 40000.,\n", - " 40500., 41000., 41500., 42000., 42500., 43000., 43500., 44000.,\n", - " 44500., 45000., 45500., 46000., 46500., 47000., 47500., 48000.,\n", - " 48500., 49000., 49500., 50000.]),\n", - " 'y': array([-1.39978279e+04, -1.16337191e+04, -2.68644583e+02, -4.81148099e+02,\n", - " -5.37553632e+03, -9.65428582e+02, -4.47807589e+02, -6.01029205e+02,\n", - " -6.23635546e+02, -4.86614758e+02, -5.86516396e+02, -5.26575099e+02,\n", - " -5.99829531e+02, -9.83383993e+02, -1.22709868e+03, -6.63552712e+02,\n", - " -6.17231316e+02, -5.18178522e+02, -4.96102642e+02, -5.25783329e+02,\n", - " -5.66184133e+02, -5.39487841e+02, -6.24070572e+02, -5.12905280e+02,\n", - " -4.71160085e+02, -4.83385453e+02, -4.61873205e+02, -4.94672987e+02,\n", - " -2.69529493e+02, -6.25345326e+02, -4.65210493e+02, -5.22635239e+02,\n", - " -4.13307282e+02, -5.34696272e+02, -7.08614824e+02, -6.07681299e+02,\n", - " -4.35267591e+02, -4.43722224e+02, -4.71928035e+02, -4.35631926e+02,\n", - " -4.51979864e+02, -4.85855008e+02, -5.26781025e+02, -5.07743968e+02,\n", - " -5.55303974e+02, -4.26916411e+02, -4.47085951e+02, -4.48743111e+02,\n", - " -4.55254629e+02, -4.60964782e+02, -4.23541517e+02, -4.27516038e+02,\n", - " -4.07931778e+02, -3.85629784e+02, -4.64468752e+02, -3.22505433e+02,\n", - " -4.33003342e+02, -3.47729789e+02, -4.16001104e+02, -4.92683452e+02,\n", - " -5.16864610e+02, -4.87113742e+02, -4.09838037e+02, -4.37475618e+02,\n", - " -4.40755991e+02, -4.45760725e+02, -3.98495226e+02, -3.37347642e+02,\n", - " -4.04627345e+02, -4.25855615e+02, -3.24957222e+02, -3.98866528e+02,\n", - " -5.13136206e+02, -5.34672920e+02, -8.47490255e+02, -6.65226782e+02,\n", - " -5.74850167e+02, -4.36338131e+02, -2.97705722e+02, -2.95073105e+02,\n", - " -2.30767740e+02, -5.06974657e+02, -2.16164066e+02, -5.27535377e+02,\n", - " -4.94164650e+02, -2.63081044e+02, -1.29482152e+01, -5.00627982e+02,\n", - " -3.61175779e+02, -2.99146934e+02, -3.66197322e+01, -3.80255393e+01,\n", - " -1.76372415e+01, -3.73752621e+02, -4.16135963e+02, -2.68355811e+02,\n", - " -1.34110857e+01, -1.18275825e+02, -1.78695343e+02, -4.77207037e+02]),\n", - " 'c': array([68.2, 75. , 59. , 70.8, 75. , 75. , 72.5, 75. , 75. , 75. , 75. ,\n", - " 75. , 75. , 75. , 75. , 75. , 75. , 75. , 75. , 74.5, 75. , 74.8,\n", - " 75. , 74.9, 74. , 73.7, 60.7, 54.3, 37. , 74.7, 74. , 50.5, 69.8,\n", - " 74.4, 75. , 71.5, 69. , 74.8, 72.6, 74.9, 72.3, 71.6, 75. , 75. ,\n", - " 73.5, 75. , 68.5, 68.5, 68.4, 70.6, 75. , 75. , 73.6, 58.7, 74.2,\n", - " 44.1, 74.4, 59.8, 71.7, 75. , 75. , 63.6, 60.4, 71.6, 66.1, 64.5,\n", - " 60.9, 56.2, 74.2, 67.1, 40.7, 56.9, 52. , 54.4, 56.9, 58.3, 69.5,\n", - " 65.2, 49.6, 58.3, 37.6, 65.6, 58. , 75. , 74. , 47.8, 0. , 58.7,\n", - " 45. , 39.2, 9.2, 13.4, 7.3, 54.6, 55.5, 45.7, 1. , 17.1, 30.2,\n", - " 67.1])},\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([ -84.75858338, -87.40659822, -71.40335766, -164.30481958,\n", + " -99.39236823, -157.97967084, -97.35770974, -106.74897974,\n", + " -31.69817764, -18.91556815, -6.85103537, -5.91246398,\n", + " -10.91007875, -9.23343595, -8.19291483, -9.30628586,\n", + " -5.96941117, -30.53573469, -22.3662505 , -87.49797925]),\n", + " 'z': array([15.37321957, 4.09066969, 48.9356664 , 22.30673999, 29.46223032,\n", + " 12.82361635, 11.0853089 , 1.34894136, 19.03377157, 16.46351775,\n", + " 1.98202837, 1.4020216 , 2.33703549, 2.99290714, 1.00996253,\n", + " 2.05439719, 1.58743207, 23.6289253 , 32.87234004, 53.79426573]),\n", + " 'c': array([23. , 27.5, 17.7, 21.4, 20.6, 31.7, 24.7, 27.9, 7.9, 3.3, 0. ,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 6. , 3.6, 19.7])},\n", " 2: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", - " 8500., 9000., 9500., 10000., 10500., 11000., 11500., 12000.,\n", - " 12500., 13000., 13500., 14000., 14500., 15000., 15500., 16000.,\n", - " 16500., 17000., 17500., 18000., 18500., 19000., 19500., 20000.,\n", - " 20500., 21000., 21500., 22000., 22500., 23000., 23500., 24000.,\n", - " 24500., 25000., 25500., 26000., 26500., 27000., 27500., 28000.,\n", - " 28500., 29000., 29500., 30000., 30500., 31000., 31500., 32000.,\n", - " 32500., 33000., 33500., 34000., 34500., 35000., 35500., 36000.,\n", - " 36500., 37000., 37500., 38000., 38500., 39000., 39500., 40000.,\n", - " 40500., 41000., 41500., 42000., 42500., 43000., 43500., 44000.,\n", - " 44500., 45000., 45500., 46000., 46500., 47000., 47500., 48000.,\n", - " 48500., 49000., 49500., 50000.]),\n", - " 'y': array([ -419.88586153, -475.93118421, -506.05602028, -421.21010321,\n", - " -442.04410281, -419.8867473 , -733.38972642, -567.68615098,\n", - " -561.41979275, -551.65090128, -610.19774788, -11580.04250759,\n", - " -588.76837472, -577.91330549, -438.90882211, -351.55491758,\n", - " -511.2351893 , -441.68311177, -659.24799389, -415.74915622,\n", - " -380.77272297, -401.21784902, -407.82671221, -7521.76404587,\n", - " -6255.92575407, -462.03041279, -454.34549041, -624.00920391,\n", - " -589.08490456, -484.01619275, -471.51571345, -471.88553558,\n", - " -453.5311147 , -469.57877974, -435.59546103, -516.29733513,\n", - " -619.34171443, -591.87216081, -589.21659591, -419.7929405 ,\n", - " -456.44025254, -428.02778266, -593.25319192, -542.0477097 ,\n", - " -541.5520789 , -558.52901035, -553.19642527, -553.79373361,\n", - " -558.56120686, -516.49383055, -512.66534195, -697.33357848,\n", - " -876.82429058, -1098.07554216, -477.61280733, -451.74065975,\n", - " -402.50068199, -517.77069188, -598.29257578, -582.8069224 ,\n", - " -577.87749496, -563.14204888, -566.41514884, -541.71606806,\n", - " -8335.82760085, -10175.94399261, -618.2934174 , -587.77629693,\n", - " -608.33836633, -599.95524999, -564.8702342 , -574.4031588 ,\n", - " -480.29490336, -897.76520987, -648.83600589, -681.24317771,\n", - " -662.76882584, -724.81546013, -616.04125164, -782.29932119,\n", - " -520.26047829, -648.33646013, -436.91912141, -422.82299453,\n", - " -430.22012838, -524.30239139, -2967.19183164, -448.6750295 ,\n", - " -432.69993831, -414.5267237 , -529.19645949, -504.54926939,\n", - " -3856.07001263, -484.63436687, -444.50897046, -416.34501806,\n", - " -426.66330806, -436.18707116, -426.51229272, -418.83342933]),\n", - " 'c': array([65.1, 75. , 70.5, 68.7, 72.8, 70.3, 75. , 73.7, 72.5, 72.1, 71.7,\n", - " 75. , 75. , 75. , 73.8, 54.3, 75. , 75. , 72.7, 74.9, 53. , 53.7,\n", - " 68.3, 75. , 75. , 72.3, 67.9, 74.7, 74.8, 75. , 75. , 75. , 71.3,\n", - " 67.6, 68.4, 70.7, 72.5, 74.4, 72.8, 68.6, 73.6, 73.5, 75. , 75. ,\n", - " 75. , 75. , 75. , 74.2, 75. , 75. , 75. , 75. , 75. , 75. , 62.9,\n", - " 75. , 63.7, 75. , 75. , 75. , 75. , 75. , 75. , 75. , 75. , 75. ,\n", - " 75. , 74.8, 75. , 75. , 74.5, 74.8, 71.9, 75. , 75. , 75. , 75. ,\n", - " 74.7, 74.9, 75. , 74.5, 74.9, 75. , 70.3, 67.1, 75. , 68.8, 74. ,\n", - " 75. , 74.8, 69.1, 75. , 75. , 75. , 68.8, 72.1, 74.3, 72.7, 75. ,\n", - " 71. ])},\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-244.86325227, -86.3951179 , -85.37054626, -76.94824509,\n", + " -81.44168073, -77.58697262, -33.36149021, -105.36088698,\n", + " -96.24617048, -96.71227961, -93.16172698, -105.29479754,\n", + " -126.53547428, -128.7206711 , -107.23703396, -95.2677292 ,\n", + " -35.4000771 , -9.13614517, -104.2566314 , -11.59143422]),\n", + " 'z': array([32.92787665, 6.05179659, 10.69826999, 33.28526877, 15.57732296,\n", + " 22.10218203, 25.21959222, 41.97317817, 30.7794646 , 28.29451386,\n", + " 13.33886871, 1.68283583, 1.28672757, 0.5135691 , 2.01013613,\n", + " 38.40509519, 28.99504031, 3.47007711, 24.24935571, 7.70797002]),\n", + " 'c': array([31.8, 26.5, 23. , 19.6, 27.3, 23. , 7.2, 20.6, 24.1, 26.7, 27.7,\n", + " 33.3, 29.9, 29.9, 31.7, 19.5, 7.5, 0.1, 24. , 2.1])},\n", " 3: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", - " 8500., 9000., 9500., 10000., 10500., 11000., 11500., 12000.,\n", - " 12500., 13000., 13500., 14000., 14500., 15000., 15500., 16000.,\n", - " 16500., 17000., 17500., 18000., 18500., 19000., 19500., 20000.,\n", - " 20500., 21000., 21500., 22000., 22500., 23000., 23500., 24000.,\n", - " 24500., 25000., 25500., 26000., 26500., 27000., 27500., 28000.,\n", - " 28500., 29000., 29500., 30000., 30500., 31000., 31500., 32000.,\n", - " 32500., 33000., 33500., 34000., 34500., 35000., 35500., 36000.,\n", - " 36500., 37000., 37500., 38000., 38500., 39000., 39500., 40000.,\n", - " 40500., 41000., 41500., 42000., 42500., 43000., 43500., 44000.,\n", - " 44500., 45000., 45500., 46000., 46500., 47000., 47500., 48000.,\n", - " 48500., 49000., 49500., 50000.]),\n", - " 'y': array([-402.87768605, -416.03925652, -397.67433939, -15.51086244,\n", - " -338.74450269, -13.78740854, -67.46660934, -21.78790687,\n", - " -29.73239106, -32.3916248 , -54.77101753, -92.82352469,\n", - " -107.97453387, -9.60859989, -14.76553584, -22.88353444,\n", - " -12.82946344, -9.34521305, -132.37672991, -21.05507744,\n", - " -15.84140431, -18.83522117, -19.40723473, -19.54364041,\n", - " -7.09304335, -37.89645659, -4.56799086, -13.06765202,\n", - " -9.07659618, -17.14236606, -19.15115235, -9.32101615,\n", - " -5.29976631, -36.1093172 , -18.30178842, -6.8827166 ,\n", - " -8.89846165, -7.57448302, -13.61864501, -18.04705961,\n", - " -19.02294347, -23.07490903, -18.25020172, -16.16611976,\n", - " -28.15521032, -34.55727866, -12.80221679, -28.29491116,\n", - " -43.0927741 , -28.53239886, -11.11941774, -22.20636851,\n", - " -8.05345016, -13.29283074, -25.64067706, -23.33745374,\n", - " -9.50822568, -9.25335078, -14.12768848, -6.93936013,\n", - " -25.64781529, -25.26731862, -26.42465292, -51.34845922,\n", - " -55.17523907, -41.99262818, -43.06463348, -63.5350736 ,\n", - " -65.10800316, -39.23369191, -31.83457628, -38.7218268 ,\n", - " -25.70734803, -24.22637536, -179.86802223, -5.47959912,\n", - " -7.17286481, -8.74904283, -15.47622047, -13.02475882,\n", - " -20.18156647, -15.31774842, -17.21061661, -17.18823759,\n", - " -27.56809509, -25.49753247, -16.33563442, -26.52646995,\n", - " -38.69693586, -28.86193921, -24.89950026, -45.43547793,\n", - " -82.49761768, -52.29974492, -4.69658362, -22.28091931,\n", - " -30.59838984, -48.22223506, -35.83578102, -15.7452339 ]),\n", - " 'c': array([69.8, 70.4, 50.4, 0.2, 34.7, 0. , 0.8, 0. , 0. , 0. , 1. ,\n", - " 0.9, 0.1, 0. , 0. , 0. , 0. , 0. , 1.4, 0. , 0.2, 0. ,\n", - " 0. , 0.2, 0. , 1.4, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", - " 0.7, 0. , 0. , 0. , 0. , 0.4, 0.9, 0.3, 0.5, 0. , 0.2,\n", - " 0. , 0.3, 0. , 0.7, 0.7, 0.5, 0. , 0. , 0. , 0. , 0.4,\n", - " 0.3, 0.1, 0. , 0. , 0. , 0.5, 0. , 0. , 0. , 0.7, 0. ,\n", - " 0.5, 0.1, 1.5, 0.4, 1. , 0.2, 0.2, 0. , 3.9, 0. , 0. ,\n", - " 0. , 0. , 0.1, 0. , 0. , 0. , 0.5, 0.3, 0.1, 0. , 0.1,\n", - " 0.1, 0.1, 0.1, 0.1, 0. , 0. , 0. , 0.4, 0.4, 0.3, 0.2,\n", - " 0. ])},\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([ -88.73759221, -126.00000311, -90.99067143, -80.46410846,\n", + " -12.83645707, -16.03716323, -42.81936977, -13.51917284,\n", + " -24.75945539, -35.83917692, -14.02774955, -9.29914404,\n", + " -10.60575605, -11.95517279, -7.10112706, -7.92139116,\n", + " -19.43059879, -75.15357008, -13.38348433, -7.0985137 ]),\n", + " 'z': array([13.15407607, 52.46704738, 2.18520214, 11.22204933, 12.81667528,\n", + " 7.51305438, 27.89587958, 13.54602554, 23.6786912 , 37.86153515,\n", + " 14.33454502, 4.06873548, 4.58162264, 8.51931056, 3.7506986 ,\n", + " 1.18807648, 28.95627727, 30.58058651, 24.14060408, 1.41197402]),\n", + " 'c': array([26.2, 26.3, 29.5, 22.7, 1.8, 3.5, 11.7, 1.7, 7.5, 7.8, 2.8,\n", + " 0.8, 1.3, 3.6, 0.9, 0. , 4.4, 21.7, 2.4, 0. ])},\n", " 4: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", - " 8500., 9000., 9500., 10000., 10500., 11000., 11500., 12000.,\n", - " 12500., 13000., 13500., 14000., 14500., 15000., 15500., 16000.,\n", - " 16500., 17000., 17500., 18000., 18500., 19000., 19500., 20000.,\n", - " 20500., 21000., 21500., 22000., 22500., 23000., 23500., 24000.,\n", - " 24500., 25000., 25500., 26000., 26500., 27000., 27500., 28000.,\n", - " 28500., 29000., 29500., 30000., 30500., 31000., 31500., 32000.,\n", - " 32500., 33000., 33500., 34000., 34500., 35000., 35500., 36000.,\n", - " 36500., 37000., 37500., 38000., 38500., 39000., 39500., 40000.,\n", - " 40500., 41000., 41500., 42000., 42500., 43000., 43500., 44000.,\n", - " 44500., 45000., 45500., 46000., 46500., 47000., 47500., 48000.,\n", - " 48500., 49000., 49500., 50000.]),\n", - " 'y': array([-4.57500509e+02, -4.77922800e+02, -7.96641101e+01, -2.84932400e+02,\n", - " -1.09807566e+02, -6.53430145e+00, -5.41508352e+00, -4.98000493e+00,\n", - " -6.74103101e+00, -7.40928002e+00, -7.43599825e+00, -1.24499965e+01,\n", - " -1.17793271e+01, -5.01812639e+00, -5.90506680e+00, -7.71943377e+00,\n", - " -4.07006721e+00, -3.69255045e+00, -1.08510502e+01, -4.11559234e+00,\n", - " -3.85794991e+00, -1.50870028e+01, -5.76964481e+00, -4.79259043e+00,\n", - " -7.02187923e+00, -4.91691786e+00, -4.03516720e+00, -3.73672692e+00,\n", - " -7.17760023e+00, -3.51616247e+00, -3.45872154e+00, -4.91154067e+00,\n", - " -4.40776768e+00, -3.77920118e+00, -5.27338228e+00, -3.82112310e+00,\n", - " -3.46206971e+00, -4.37610729e+00, -4.76651356e+00, -5.09622795e+00,\n", - " -3.85846420e+00, -4.80509717e+00, -1.39312864e+02, -6.80146920e+00,\n", - " -6.35461635e+00, -4.93699939e+00, -3.24652009e+00, -1.30364774e+01,\n", - " -4.53752632e+00, -8.93677593e+00, -3.93623019e+00, -5.97454732e+00,\n", - " -1.11628673e+01, -8.55298337e+00, -3.87388258e+00, -5.23504896e+02,\n", - " -6.00014427e+04, -1.06109734e+03, -2.33565026e+02, -5.45415616e+02,\n", - " -7.04119988e+00, -3.64701379e+00, -4.14990241e+00, -1.04589259e+01,\n", - " -7.42013209e+00, -2.48328808e+00, -4.96037687e+00, -3.65201618e+00,\n", - " -4.37270843e+00, -5.32763222e+00, -8.32588126e+00, -1.14359864e+01,\n", - " -6.07596597e+00, -1.32472316e+01, -5.13856728e+00, -3.80266858e+00,\n", - " -5.04807738e+00, -6.82508155e+00, -3.51097630e+00, -3.21814344e+00,\n", - " -3.20756772e+00, -7.15586469e+00, -3.38546545e+00, -3.73316371e+00,\n", - " -4.06561456e+00, -6.12323427e+00, -4.45902448e+00, -2.23620498e+01,\n", - " -1.11402504e+01, -8.76936709e+00, -3.38528020e+00, -3.54128248e+00,\n", - " -1.50799442e+01, -5.18014918e+00, -5.46999037e+00, -5.55881441e+00,\n", - " -7.65469989e+00, -4.56037432e+00, -2.37177098e+01, -6.43359233e+00]),\n", - " 'c': array([65.6, 75. , 34.3, 49.2, 28.6, 2. , 3. , 0. , 0. , 0. , 0. ,\n", - " 0. , 5.3, 0. , 0. , 0. , 0. , 0. , 0. , 0.1, 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0.2, 0. , 0. , 0. , 1.4, 0. , 0. ,\n", - " 0.7, 0. , 0.3, 0. , 0. , 0. , 0. , 0. , 0. , 19.1, 0. ,\n", - " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,\n", - " 67.8, 75. , 75. , 34.7, 52.5, 3.9, 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0.2, 0. , 0. , 0. , 0. , 0. , 0. , 0.3, 0.2,\n", - " 0. , 0.2, 0.1, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 4.9,\n", - " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.6, 0. , 0. , 0.7,\n", - " 0.1])}}}" + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([ -57.85865149, -74.11694082, -70.19026064, -84.18831636,\n", + " -83.53374019, -71.85111259, -57.51445245, -84.49066923,\n", + " -72.5572182 , -79.28883464, -45.83532699, -129.85670736,\n", + " -313.31889514, -19.69863616, -9.36634841, -25.68620527,\n", + " -11.51693558, -9.62308996, -30.12383728, -12.26404949]),\n", + " 'z': array([30.18438732, 29.870457 , 29.04197513, 3.22187512, 25.36574105,\n", + " 37.05351538, 36.13086451, 36.08591068, 49.98065166, 54.27231097,\n", + " 28.48041137, 23.18674818, 41.32047561, 13.10132903, 3.27045503,\n", + " 3.49000542, 7.35901433, 5.1008803 , 14.51229556, 10.6276242 ]),\n", + " 'c': array([16.2, 22.1, 22.9, 28.2, 24. , 16.2, 14.4, 20.1, 16.9, 16.1, 12.8,\n", + " 22.9, 23. , 1.5, 0. , 0. , 0. , 0. , 0. , 0. ])},\n", + " 5: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([ -95.77587374, -85.33406005, -114.33498087, -112.34155807,\n", + " -101.60911145, -133.99463064, -8.74047116, -115.08657042,\n", + " -17.69012209, -103.04353705, -122.925984 , -78.92432405,\n", + " -101.91064899, -99.68935924, -30.05280358, -94.6588618 ,\n", + " -31.63711694, -14.87276245, -14.76882662, -21.25790984]),\n", + " 'z': array([15.75878618, 14.32290751, 30.06350849, 35.90840224, 6.94466408,\n", + " 33.31356661, 3.73396229, 39.76046643, 6.17335928, 45.46260125,\n", + " 14.57370589, 46.95368372, 19.03904578, 8.04376882, 22.22837657,\n", + " 3.83988157, 33.5729415 , 12.50391111, 0.96691551, 4.3192503 ]),\n", + " 'c': array([24.6, 23.4, 23.4, 25. , 29.4, 27.7, 0. , 21.5, 0. , 14.3, 17.3,\n", + " 17.3, 25.6, 26.8, 4.8, 26.8, 6.8, 3.3, 0. , 1.9])},\n", + " 6: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-121.51290919, -92.26852823, -21.4798191 , -78.72161812,\n", + " -92.70530608, -57.91764719, -98.39567541, -67.57846499,\n", + " -53.85569133, -54.57893978, -18.58912415, -31.86498026,\n", + " -25.25531387, -54.93528158, -16.70189186, -18.7636848 ,\n", + " -11.63996589, -15.92818131, -14.74570672, -21.61254021]),\n", + " 'z': array([25.29275722, 20.45706251, 11.31938166, 27.18819152, 6.56093936,\n", + " 35.39044708, 18.98274941, 25.62347078, 27.17950953, 33.98969634,\n", + " 8.61352965, 18.7021114 , 16.42592361, 22.47267129, 7.07717974,\n", + " 4.18982177, 2.08890093, 7.82016317, 8.789092 , 7.12586545]),\n", + " 'c': array([26.4, 21. , 5.6, 20.1, 25.6, 13.9, 23.1, 17.1, 13.1, 11.5, 1.9,\n", + " 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.6, 2.2])},\n", + " 7: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-108.38387466, -98.20150427, -34.85556526, -42.10022212,\n", + " -65.56799993, -11.34277597, -28.18381525, -10.79434242,\n", + " -7.90888795, -12.71270688, -21.11374718, -103.85949379,\n", + " -51.93365982, -152.8030517 , -175.08409233, -145.15263834,\n", + " -115.76310848, -102.18559016, -27.85745691, -13.47415484]),\n", + " 'z': array([21.44585732, 6.34545397, 14.20572753, 36.76684962, 25.82404116,\n", + " 6.43910912, 25.48781057, 4.73920333, 2.30441533, 2.37860444,\n", + " 20.16364963, 76.89157171, 56.37863798, 9.83543846, 13.79390946,\n", + " 10.22818282, 6.37749184, 3.97117169, 5.19507364, 2.85361773]),\n", + " 'c': array([29.5, 26.2, 1.2, 7.1, 16. , 0.7, 5.1, 2. , 0. , 0. , 4.7,\n", + " 14.4, 11. , 38. , 40.8, 40.2, 34.4, 35.4, 0. , 0. ])},\n", + " 8: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([-166.43640934, -174.6512315 , -81.928213 , -261.60196128,\n", + " -135.05830111, -152.05504637, -295.16612293, -238.92046484,\n", + " -136.93420936, -147.83613596, -242.14531438, -114.41628891,\n", + " -28.59287696, -37.56466098, -19.71274223, -69.81461789,\n", + " -33.5928915 , -178.88189746, -114.03204541, -49.9306826 ]),\n", + " 'z': array([98.32053454, 59.11481943, 26.0411492 , 74.75096028, 0.3754464 ,\n", + " 3.6909833 , 18.00529347, 29.42848687, 16.84944035, 22.69305223,\n", + " 34.00426581, 10.29195785, 24.08533589, 26.63153401, 9.93230311,\n", + " 23.82371447, 18.06219468, 30.78322225, 43.88140579, 45.76903533]),\n", + " 'c': array([24.3, 24.2, 21.1, 24.8, 39.7, 38.2, 37.7, 33.9, 27.2, 24.4, 30.2,\n", + " 29.1, 7. , 9.3, 3.5, 15.6, 6.2, 28.9, 20.4, 8.5])},\n", + " 9: {'x': array([ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,\n", + " 4500., 5000., 5500., 6000., 6500., 7000., 7500., 8000.,\n", + " 8500., 9000., 9500., 10000.]),\n", + " 'y': array([ -98.59481682, -76.35499153, -82.34566714, -97.722363 ,\n", + " -55.73676181, -62.23786681, -107.983981 , -123.65415846,\n", + " -60.05662397, -32.24639699, -45.29479073, -17.22421801,\n", + " -10.93949224, -51.29447895, -7.56834087, -6.51862324,\n", + " -5.432557 , -4.93144591, -8.47881117, -7.1310802 ]),\n", + " 'z': array([ 9.15574628, 18.6260246 , 20.07091993, 40.44450302, 26.40887359,\n", + " 34.10840208, 8.73629036, 11.55301326, 39.18273304, 11.03148291,\n", + " 34.53789721, 4.92900077, 5.66695823, 36.4908701 , 2.27143873,\n", + " 1.85179411, 1.44159655, 1.45038918, 2.62394397, 2.13812183]),\n", + " 'c': array([27.6, 24.5, 21.3, 21.5, 14.2, 15.8, 29.6, 26.1, 16.7, 6.5, 11.1,\n", + " 0. , 1.8, 11.2, 0. , 0. , 0. , 0. , 0. , 0. ])}}}" ] }, - "execution_count": 51, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -642,7 +653,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -655,12 +666,44 @@ "2\n", "3\n", "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", "sac\n", "0\n", "1\n", "2\n", "3\n", - "4\n" + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "td3\n", + "0\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "ddpg\n", + "0\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n" ] }, { @@ -669,13 +712,13 @@ "Text(0.5, 1.0, 'Task: Cartpole')" ] }, - "execution_count": 55, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -686,7 +729,7 @@ ], "source": [ "eval_data = {}\n", - "w = 5\n", + "w = 1\n", "fig = plt.figure()\n", "for method in data_paths.keys():\n", " print(method)\n", @@ -695,16 +738,17 @@ " print(seed)\n", " temp[seed, 0, :] = perf_data[method][seed][\"x\"]\n", " temp[seed, 1, :] = perf_data[method][seed][\"y\"]\n", - " # temp[seed, 2, :] = perf_data[method][seed][\"z\"]\n", + " temp[seed, 2, :] = perf_data[method][seed][\"z\"]\n", " temp[seed, 3, :] = perf_data[method][seed][\"c\"]\n", " eval_data.update({method: temp})\n", "\n", " # plotting performance\n", - " # plt.plot(temp[0,0,:], -moving_average(np.mean(temp[:,1,:], axis=0),w)[w-1:], label=method)\n", - " # plt.fill_between(temp[0,0,:], np.mean(temp[:,1,:], axis=0)+np.mean(temp[:,2,:], axis=0)**.5, np.mean(temp[:,1,:], axis=0)-np.mean(temp[:,2,:], axis=0)**0.5, alpha=0.25)\n", + " plt.plot(temp[0,0,:], np.mean(temp[:,1,:], axis=0), label=method)\n", + " plt.fill_between(temp[0,0,:], np.mean(temp[:,1,:], axis=0)+np.mean(temp[:,2,:], axis=0), \n", + " np.mean(temp[:,1,:], axis=0)-np.mean(temp[:,2,:], axis=0), alpha=0.25)\n", "\n", " # plotting constraint violations\n", - " plt.plot(temp[0,0,:], np.mean(temp[:,3,:], axis=0), label=method)\n", + " # plt.plot(temp[0,0,:], np.mean(temp[:,3,:], axis=0), label=method)\n", "\n", "# gp_05 = np.load(os.getcwd() + \"/gp_mpc_data/gp_mpc_M_0.5_cost.npy\", allow_pickle=True)\n", "# gp_10 = np.load(os.getcwd() + \"/gp_mpc_data/gp_mpc_M_1.0_cost.npy\", allow_pickle=True)\n", @@ -717,16 +761,16 @@ "# plt.plot(gp_30.item()[\"mean\"][:,0], gp_30.item()[\"mean\"][:,1], label=\"GP-MPC (m=3.0)\")\n", "# plt.fill_between(gp_30.item()[\"mean\"][:,0], gp_30.item()[\"mean\"][:,1]-gp_30.item()[\"std\"], gp_30.item()[\"mean\"][:,1]+gp_30.item()[\"std\"], alpha=0.25)\n", "\n", - "gp_05 = np.load(os.getcwd() + \"/gp_mpc_data/gp_mpc_M_0.5_constraint_percentage.npy\", allow_pickle=True)\n", - "gp_10 = np.load(os.getcwd() + \"/gp_mpc_data/gp_mpc_M_1.0_constraint_percentage.npy\", allow_pickle=True)\n", - "gp_30 = np.load(os.getcwd() + \"/gp_mpc_data/gp_mpc_M_3.0_constraint_percentage.npy\", allow_pickle=True)\n", - "plt.plot(gp_05.item()[\"mean\"][:,0], gp_05.item()[\"mean\"][:,1], label=\"GP-MPC (m=0.5)\")\n", - "plt.plot(gp_10.item()[\"mean\"][:,0], gp_10.item()[\"mean\"][:,1], label=\"GP-MPC (m=1.0)\")\n", - "plt.plot(gp_30.item()[\"mean\"][:,0], gp_30.item()[\"mean\"][:,1], label=\"GP-MPC (m=3.0)\")\n", + "# gp_05 = np.load(os.getcwd() + \"/gp_mpc_data/gp_mpc_M_0.5_constraint_percentage.npy\", allow_pickle=True)\n", + "# gp_10 = np.load(os.getcwd() + \"/gp_mpc_data/gp_mpc_M_1.0_constraint_percentage.npy\", allow_pickle=True)\n", + "# gp_30 = np.load(os.getcwd() + \"/gp_mpc_data/gp_mpc_M_3.0_constraint_percentage.npy\", allow_pickle=True)\n", + "# plt.plot(gp_05.item()[\"mean\"][:,0], gp_05.item()[\"mean\"][:,1], label=\"GP-MPC (m=0.5)\")\n", + "# plt.plot(gp_10.item()[\"mean\"][:,0], gp_10.item()[\"mean\"][:,1], label=\"GP-MPC (m=1.0)\")\n", + "# plt.plot(gp_30.item()[\"mean\"][:,0], gp_30.item()[\"mean\"][:,1], label=\"GP-MPC (m=3.0)\")\n", "\n", "\n", "plt.legend()\n", - "plt.ylim(0,100)\n", + "plt.ylim(-200,0)\n", "plt.xscale(\"log\")\n", "plt.xlabel(\"Training steps\")\n", "plt.ylabel(\"% Constraint violation\")\n", diff --git a/examples/rl/train_rl_model.sh b/examples/rl/train_rl_model.sh index 712b2db17..8c4d3155d 100755 --- a/examples/rl/train_rl_model.sh +++ b/examples/rl/train_rl_model.sh @@ -1,19 +1,19 @@ #!/bin/bash -#SYS='cartpole' -# SYS='quadrotor_2D' -SYS='quadrotor_2D_attitude' -# SYS='quadrotor_3D' +SYS='cartpole' +#SYS='quadrotor_2D' +#SYS='quadrotor_2D_attitude' +#SYS='quadrotor_3D' -# TASK='stab' -TASK='track' +TASK='stab' +#TASK='track' -ALGO='ppo' -# ALGO='sac' +#ALGO='ppo' +#ALGO='sac' #ALGO='td3' -# ALGO='ddpg' +ALGO='ddpg' -# ALGO='safe_explorer_ppo' +#ALGO='safe_explorer_ppo' if [ "$SYS" == 'cartpole' ]; then SYS_NAME=$SYS @@ -45,7 +45,7 @@ if [ "$ALGO" == 'safe_explorer_ppo' ]; then fi # Train the unsafe controller/agent. -for SEED in {0..0} +for SEED in {0..9} do python3 ../../safe_control_gym/experiments/train_rl_controller.py \ --algo ${ALGO} \ diff --git a/examples/rlmpc/config_overrides/cartpole/qlearning_mpc_cartpole.yaml b/examples/rlmpc/config_overrides/cartpole/qlearning_mpc_cartpole.yaml index 8b1378917..e69de29bb 100644 --- a/examples/rlmpc/config_overrides/cartpole/qlearning_mpc_cartpole.yaml +++ b/examples/rlmpc/config_overrides/cartpole/qlearning_mpc_cartpole.yaml @@ -1 +0,0 @@ - diff --git a/examples/rlmpc/rlmpc_experiment.py b/examples/rlmpc/rlmpc_experiment.py index d6b39932f..bf605003d 100644 --- a/examples/rlmpc/rlmpc_experiment.py +++ b/examples/rlmpc/rlmpc_experiment.py @@ -46,7 +46,6 @@ def run(plot=True, training=False, n_episodes=1, n_steps=None, curr_path='.'): results, uncert_metrics = experiment.run_evaluation(n_episodes=n_episodes, n_steps=n_steps) elapsed_time_uncert = results['timestamp'][0][-1] - results['timestamp'][0][0] - if __name__ == '__main__': run() diff --git a/examples/rlmpc/rlmpc_experiment.sh b/examples/rlmpc/rlmpc_experiment.sh index b63aac140..337e31cce 100755 --- a/examples/rlmpc/rlmpc_experiment.sh +++ b/examples/rlmpc/rlmpc_experiment.sh @@ -19,4 +19,4 @@ python3 ./rlmpc_experiment.py \ --task ${SYS_NAME} \ --algo ${ALGO} \ --overrides \ - ./config_overrides/${SYS}/${SYS}_${TASK}.yaml + ./config_overrides/${SYS}/${SYS}_${TASK}.yaml diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..431afb635 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +munch~=2.5.0 +pytest~=7.4.4 +matplotlib~=3.9.0 +numpy~=1.26.4 +pyyaml~=6.0.1 +pybullet~=3.2.6 +setuptools~=69.5.1 +casadi~=3.6.5 +gymnasium~=0.28.1 +torch~=1.13.1 +scipy~=1.13.1 +imageio~=2.34.1 +tensorboard~=2.16.2 +termcolor~=1.1.0 +gpytorch~=1.11 +scikit-learn~=1.5.0 +cvxpy~=1.5.1 +pytope~=0.0.4 +optuna~=3.6.1 +mysql-connector-python~=8.0.33 diff --git a/safe_control_gym/controllers/__init__.py b/safe_control_gym/controllers/__init__.py index e3b500681..2897d9453 100644 --- a/safe_control_gym/controllers/__init__.py +++ b/safe_control_gym/controllers/__init__.py @@ -34,6 +34,10 @@ entry_point='safe_control_gym.controllers.sac.sac:SAC', config_entry_point='safe_control_gym.controllers.sac:sac.yaml') +register(idx='td3', + entry_point='safe_control_gym.controllers.td3.td3:TD3', + config_entry_point='safe_control_gym.controllers.td3:td3.yaml') + register(idx='ddpg', entry_point='safe_control_gym.controllers.ddpg.ddpg:DDPG', config_entry_point='safe_control_gym.controllers.ddpg:ddpg.yaml') @@ -55,20 +59,17 @@ config_entry_point='safe_control_gym.controllers.mpc:sqp_mpc.yaml') register(idx='sqp_gp_mpc', - entry_point='safe_control_gym.controllers.mpc.sqp_gp_mpc:SQPGPMPC', - config_entry_point='safe_control_gym.controllers.mpc:sqp_gp_mpc.yaml') + entry_point='safe_control_gym.controllers.mpc.sqp_gp_mpc:SQPGPMPC', + config_entry_point='safe_control_gym.controllers.mpc:sqp_gp_mpc.yaml') register(idx='mpc_acados', - entry_point='safe_control_gym.controllers.mpc.mpc_acados:MPC_ACADOS', - config_entry_point='safe_control_gym.controllers.mpc:mpc_acados.yaml') + entry_point='safe_control_gym.controllers.mpc.mpc_acados:MPC_ACADOS', + config_entry_point='safe_control_gym.controllers.mpc:mpc_acados.yaml') register(idx='gpmpc_acados', - entry_point='safe_control_gym.controllers.mpc.gpmpc_acados:GPMPC_ACADOS', - config_entry_point='safe_control_gym.controllers.mpc:gpmpc_acados.yaml') + entry_point='safe_control_gym.controllers.mpc.gpmpc_acados:GPMPC_ACADOS', + config_entry_point='safe_control_gym.controllers.mpc:gpmpc_acados.yaml') + register(idx='qlearning_mpc', entry_point='safe_control_gym.controllers.mpc.qlearning_mpc:Qlearning_MPC', config_entry_point='safe_control_gym.controllers.mpc:qlearning_mpc.yaml') - -register(idx='td3', - entry_point='safe_control_gym.controllers.td3.td3:TD3', - config_entry_point='safe_control_gym.controllers.td3:td3.yaml') diff --git a/safe_control_gym/controllers/ddpg/ddpg.py b/safe_control_gym/controllers/ddpg/ddpg.py index 33e9605cf..eb5b89edc 100644 --- a/safe_control_gym/controllers/ddpg/ddpg.py +++ b/safe_control_gym/controllers/ddpg/ddpg.py @@ -1,10 +1,10 @@ -'''Deep Deterministic Policy Gradient +"""Deep Deterministic Policy Gradient Reference paper & code: * [Continuous Control with Deep Reinforcement Learning](https://arxiv.org/pdf/1509.02971.pdf) * [openai spinning up - ddpg](https://github.com/openai/spinningup/tree/master/spinup/algos/pytorch/ddpg) * [DeepRL - ddpg](https://github.com/ShangtongZhang/DeepRL/blob/master/deep_rl/agent/DDPG_agent.py) -''' +""" import os import time @@ -26,7 +26,7 @@ class DDPG(BaseController): - '''Deep Deterministic Policy Gradient.''' + """Deep Deterministic Policy Gradient.""" def __init__(self, env_func, @@ -85,7 +85,7 @@ def __init__(self, self.logger = ExperimentLogger(output_dir, log_file_out=log_file_out, use_tensorboard=use_tensorboard) def reset(self): - '''Prepares for training or testing.''' + """Prepares for training or testing.""" if self.training: # set up stats tracking self.env.add_tracker('constraint_violation', 0) @@ -107,14 +107,14 @@ def reset(self): self.env.add_tracker('mse', 0, mode='queue') def close(self): - '''Shuts down and cleans up lingering resources.''' + """Shuts down and cleans up lingering resources.""" self.env.close() if self.training: self.eval_env.close() self.logger.close() def save(self, path, save_buffer=True): - '''Saves model params and experiment state to checkpoint path.''' + """Saves model params and experiment state to checkpoint path.""" path_dir = os.path.dirname(path) os.makedirs(path_dir, exist_ok=True) @@ -141,7 +141,7 @@ def save(self, path, save_buffer=True): torch.save(state_dict, path) def load(self, path): - '''Restores model and experiment given checkpoint path.''' + """Restores model and experiment given checkpoint path.""" state = torch.load(path) # restore params @@ -162,7 +162,7 @@ def load(self, path): self.logger.load(self.total_steps) def learn(self, env=None, **kwargs): - '''Performs learning (pre-training, training, fine-tuning, etc).''' + """Performs learning (pre-training, training, fine-tuning, etc.).""" if self.num_checkpoints > 0: step_interval = np.linspace(0, self.max_env_steps, self.num_checkpoints) interval_save = np.zeros_like(step_interval, dtype=bool) @@ -204,7 +204,7 @@ def learn(self, env=None, **kwargs): self.log_step(results) def select_action(self, obs, info=None): - '''Determine the action to take at the current timestep. + """Determine the action to take at the current timestep. Args: obs (ndarray): The observation at this timestep. @@ -212,7 +212,7 @@ def select_action(self, obs, info=None): Returns: action (ndarray): The action chosen by the controller. - ''' + """ with torch.no_grad(): obs = torch.FloatTensor(obs).to(self.device) @@ -221,7 +221,7 @@ def select_action(self, obs, info=None): return action def run(self, env=None, render=False, n_episodes=10, verbose=False, **kwargs): - '''Runs evaluation with current policy.''' + """Runs evaluation with current policy.""" self.agent.eval() self.obs_normalizer.set_read_only() if env is None: @@ -269,7 +269,7 @@ def run(self, env=None, render=False, n_episodes=10, verbose=False, **kwargs): return eval_results def train_step(self, **kwargs): - '''Performs a training step.''' + """Performs a training step.""" self.agent.train() self.obs_normalizer.unset_read_only() obs = self.obs @@ -341,7 +341,7 @@ def train_step(self, **kwargs): return results def log_step(self, results): - '''Does logging after a training step.''' + """Does logging after a training step.""" step = results['step'] # runner stats self.logger.add_scalars( @@ -371,6 +371,7 @@ def log_step(self, results): { 'ep_length': ep_lengths.mean(), 'ep_return': ep_returns.mean(), + 'ep_return_std': ep_returns.std(), 'ep_reward': (ep_returns / ep_lengths).mean(), 'ep_constraint_violation': ep_constraint_violation.mean() }, @@ -390,6 +391,7 @@ def log_step(self, results): { 'ep_length': eval_ep_lengths.mean(), 'ep_return': eval_ep_returns.mean(), + 'ep_return_std': eval_ep_returns.std(), 'ep_reward': (eval_ep_returns / eval_ep_lengths).mean(), 'constraint_violation': eval_constraint_violation.mean(), 'mse': eval_mse.mean() diff --git a/safe_control_gym/controllers/ddpg/ddpg_utils.py b/safe_control_gym/controllers/ddpg/ddpg_utils.py index ea8a98a7d..705d69691 100644 --- a/safe_control_gym/controllers/ddpg/ddpg_utils.py +++ b/safe_control_gym/controllers/ddpg/ddpg_utils.py @@ -7,6 +7,8 @@ from safe_control_gym.controllers.sac.sac_utils import SACBuffer, soft_update from safe_control_gym.math_and_models.neural_networks import MLP +from safe_control_gym.math_and_models.random_processes import OrnsteinUhlenbeckProcess +from safe_control_gym.math_and_models.schedule import LinearSchedule # ----------------------------------------------------------------------------------- # Agent @@ -14,7 +16,7 @@ class DDPGAgent: - '''A DDPG class that encapsulates model, optimizer and update functions.''' + """A DDPG class that encapsulates model, optimizer and update functions.""" def __init__(self, obs_space, @@ -46,20 +48,20 @@ def __init__(self, self.critic_opt = torch.optim.Adam(self.ac.q.parameters(), critic_lr) def to(self, device): - '''Puts agent to device.''' + """Puts agent to device.""" self.ac.to(device) self.ac_targ.to(device) def train(self): - '''Sets training mode.''' + """Sets training mode.""" self.ac.train() def eval(self): - '''Sets evaluation mode.''' + """Sets evaluation mode.""" self.ac.eval() def state_dict(self): - '''Snapshots agent state.''' + """Snapshots agent state.""" return { 'ac': self.ac.state_dict(), 'ac_targ': self.ac_targ.state_dict(), @@ -68,14 +70,14 @@ def state_dict(self): } def load_state_dict(self, state_dict): - '''Restores agent state.''' + """Restores agent state.""" self.ac.load_state_dict(state_dict['ac']) self.ac_targ.load_state_dict(state_dict['ac_targ']) self.actor_opt.load_state_dict(state_dict['actor_opt']) self.critic_opt.load_state_dict(state_dict['critic_opt']) def compute_policy_loss(self, batch): - '''Returns policy loss(es) given batch of data.''' + """Returns policy loss(es) given batch of data.""" obs = batch['obs'] act = self.ac.actor(obs) q = self.ac.q(obs, act) @@ -83,7 +85,7 @@ def compute_policy_loss(self, batch): return policy_loss def compute_q_loss(self, batch): - '''Returns q-value loss(es) given batch of data.''' + """Returns q-value loss(es) given batch of data.""" obs, act, rew, next_obs, mask = batch['obs'], batch['act'], batch['rew'], batch['next_obs'], batch['mask'] q = self.ac.q(obs, act) @@ -97,7 +99,7 @@ def compute_q_loss(self, batch): return critic_loss def update(self, batch): - '''Updates model parameters based on current training batch.''' + """Updates model parameters based on current training batch.""" results = defaultdict(list) # actor update @@ -150,7 +152,7 @@ def forward(self, obs, act): class MLPActorCritic(nn.Module): - '''Model for the actor-critic agent.''' + """Model for the actor-critic agent.""" def __init__(self, obs_space, act_space, hidden_dims=(64, 64), activation='relu'): super().__init__() @@ -180,14 +182,14 @@ def act(self, obs, **kwargs): # ----------------------------------------------------------------------------------- class DDPGBuffer(SACBuffer): - '''Storage for replay buffer during training. + """Storage for replay buffer during training. Attributes: max_size (int): maximum size of the replay buffer. batch_size (int): number of samples (steps) per batch. - scheme (dict): describs shape & other info of data to be stored. + scheme (dict): describes shape & other info of data to be stored. keys (list): names of all data from scheme. - ''' + """ def __init__(self, obs_space, act_space, max_size, batch_size=None): self.max_size = max_size @@ -224,7 +226,7 @@ def __init__(self, obs_space, act_space, max_size, batch_size=None): # ----------------------------------------------------------------------------------- def make_action_noise_process(noise_config, act_space): - '''Construct a process for generating action noise during agent training.''' + """Construct a process for generating action noise during agent training.""" process_func = noise_config.pop('func') std_config = noise_config.pop('std') diff --git a/safe_control_gym/controllers/lqr/lqr_utils.py b/safe_control_gym/controllers/lqr/lqr_utils.py index 939f068a5..9bcd5919e 100644 --- a/safe_control_gym/controllers/lqr/lqr_utils.py +++ b/safe_control_gym/controllers/lqr/lqr_utils.py @@ -91,4 +91,4 @@ def get_cost_weight_matrix(weights, dim): W = np.diag(weights * dim) else: raise Exception('Wrong dimension for cost weights.') - return W \ No newline at end of file + return W diff --git a/safe_control_gym/controllers/mpc/gp_mpc.py b/safe_control_gym/controllers/mpc/gp_mpc.py index f30de4099..143e63e02 100644 --- a/safe_control_gym/controllers/mpc/gp_mpc.py +++ b/safe_control_gym/controllers/mpc/gp_mpc.py @@ -16,7 +16,8 @@ and the inducing points are the previous MPC solution. 3. Each dimension of the learned error dynamics is an independent Zero Mean SE Kernel GP. ''' -import time, os +import os +import time from copy import deepcopy from functools import partial @@ -30,10 +31,10 @@ from sklearn.model_selection import train_test_split from skopt.sampler import Lhs +from safe_control_gym.controllers.lqr.lqr_utils import discretize_linear_system from safe_control_gym.controllers.mpc.gp_utils import (GaussianProcessCollection, ZeroMeanIndependentGPModel, covMatern52ard, covSEard, kmeans_centriods) from safe_control_gym.controllers.mpc.linear_mpc import MPC, LinearMPC -from safe_control_gym.controllers.lqr.lqr_utils import discretize_linear_system from safe_control_gym.envs.benchmark_env import Task @@ -311,7 +312,7 @@ def precompute_probabilistic_limits(self, if self.x_prev is not None and self.u_prev is not None: # cov_x = np.zeros((nx, nx)) cov_x = np.diag([self.initial_rollout_std**2] * nx) - z_batch = np.hstack((self.x_prev[:, :-1].T, self.u_prev.T)) # (T, input_dim) + z_batch = np.hstack((self.x_prev[:, :-1].T, self.u_prev.T)) # (T, input_dim) # Compute the covariance of the dynamics at each time step. time_before = time.time() _, cov_d_tensor_batch = self.gaussian_process.predict(z_batch, return_pred=False) @@ -331,7 +332,7 @@ def precompute_probabilistic_limits(self, # TODO: Addition of noise here! And do we still need initial_rollout_std # _, cov_d_tensor = self.gaussian_process.predict(z[None, :], return_pred=False) # cov_d = cov_d_tensor.detach().numpy() - if False: # if self.sparse_gp: + if False: # if self.sparse_gp: dim_gp_outputs = len(self.target_mask) cov_d = np.zeros((dim_gp_outputs, dim_gp_outputs)) K_z_z = self.gaussian_process.kernel(torch.from_numpy(z[None, self.input_mask]).double()).detach().numpy() @@ -339,9 +340,9 @@ def precompute_probabilistic_limits(self, torch.tensor(z_ind).double()).detach().numpy() for i in range(dim_gp_outputs): Q_z_z = K_z_zind[i, :, :] @ K_zind_zind_inv[i, :, :] @ K_z_zind[i, :, :].T - cov_d[i, i] = K_z_z[i, 0] - Q_z_z +\ + cov_d[i, i] = K_z_z[i, 0] - Q_z_z +\ self.K_z_zind_func(z1=z, z2=z_ind)['K'][i, :].toarray() @ Sigma_inv[i] @ self.K_z_zind_func(z1=z, z2=z_ind)['K'][i, :].T.toarray() - else: + else: cov_d = cov_d_batch[i, :, :] _, _, cov_noise, _ = self.gaussian_process.get_hyperparameters() cov_d = cov_d + np.diag(cov_noise.detach().numpy()) @@ -453,7 +454,7 @@ def setup_gp_optimizer(self, n_ind_points, solver='ipopt'): Args: n_ind_points (int): Number of inducing points. ''' - print(f'Setting up GP MPC with {solver} solver.') + print(f'Setting up GP MPC with {solver} solver.') nx, nu = self.model.nx, self.model.nu T = self.T # Define optimizer and variables. @@ -574,7 +575,7 @@ def setup_gp_optimizer(self, n_ind_points, solver='ipopt'): # 'print_time': 1, # 'expand': True, # 'verbose': True} - opts = {'expand': True,} + opts = {'expand': True, } # opti.solver('ipopt', opts) opti.solver(solver, opts) self.opti_dict = { @@ -909,14 +910,14 @@ def load(self, model_path): '''Loads a pretrained batch GP model. Args: model_path (str): Path to the pretrained model. ''' - + if not self.parallel: raise ValueError('load function only works with parallel GP models.') data = np.load(f'{model_path}/data.npz') gp_model_path = f'{model_path}/best_model.pth' self.train_gp(input_data=data['data_inputs'], target_data=data['data_targets'], gp_model=gp_model_path) print('================== GP models loaded. =================') - + def learn(self, env=None): '''Performs multiple epochs learning. ''' @@ -984,12 +985,12 @@ def learn(self, env=None): train_runs[epoch].update({episode: munch.munchify(run_results)}) lengthscale, outputscale, noise, kern = self.gaussian_process.get_hyperparameters(as_numpy=True) - - # save training data + + # save training data np.savez(os.path.join(self.output_dir, 'data'), - data_inputs=training_results['train_inputs'], - data_targets=training_results['train_targets']) - + data_inputs=training_results['train_inputs'], + data_targets=training_results['train_targets']) + # close environments for env in train_envs: env.close() @@ -1151,7 +1152,7 @@ def compute_initial_guess(self, init_state, goal_states): opti.set_value(mean_post_factor, mean_post_factor_val) opti.set_value(z_ind, z_ind_val) - # Solve the optimization problem. + # Solve the optimization problem. try: sol = opti.solve() x_val, u_val = sol.value(x_var), sol.value(u_var) @@ -1168,9 +1169,8 @@ def compute_initial_guess(self, init_state, goal_states): self.x_prev, self.u_prev = x_val, u_val x_guess = x_val u_guess = u_val - + time_after = time.time() print('MPC _compute_initial_guess time: ', time_after - time_before) return x_guess, u_guess - \ No newline at end of file diff --git a/safe_control_gym/controllers/mpc/gp_utils.py b/safe_control_gym/controllers/mpc/gp_utils.py index 7e83d3509..fb67991b6 100644 --- a/safe_control_gym/controllers/mpc/gp_utils.py +++ b/safe_control_gym/controllers/mpc/gp_utils.py @@ -59,6 +59,7 @@ def covMatern52ard(x, r_over_l = ca.sqrt(dist) return sf2 * (1 + ca.sqrt(5) * r_over_l + 5 / 3 * r_over_l ** 2) * ca.exp(- ca.sqrt(5) * r_over_l) + def covMatern52ard(x, z, ell, @@ -80,6 +81,7 @@ def covMatern52ard(x, r_over_l = ca.sqrt(dist) return sf2 * (1 + ca.sqrt(5) * r_over_l + 5 / 3 * r_over_l ** 2) * ca.exp(- ca.sqrt(5) * r_over_l) + class ZeroMeanIndependentMultitaskGPModel(gpytorch.models.ExactGP): '''Multidimensional Gaussian Process model with zero mean function. @@ -110,8 +112,8 @@ def __init__(self, # For constant mean function. if kernel == 'RBF': self.covar_module = gpytorch.kernels.ScaleKernel( - gpytorch.kernels.RBFKernel(batch_shape=torch.Size([self.n]), - ard_num_dims=train_x.shape[1]), + gpytorch.kernels.RBFKernel(batch_shape=torch.Size([self.n]), + ard_num_dims=train_x.shape[1]), batch_shape=torch.Size([self.n]), ard_num_dims=train_x.shape[1] ) @@ -119,9 +121,9 @@ def __init__(self, self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.MaternKernel(batch_shape=torch.Size([self.n]), ard_num_dims=train_x.shape[1]), - batch_shape=torch.Size([self.n]), - ard_num_dims=train_x.shape[1] - ) + batch_shape=torch.Size([self.n]), + ard_num_dims=train_x.shape[1] + ) else: raise NotImplementedError @@ -313,8 +315,8 @@ def init_with_hyperparam(self, gp_K_plus_noise_inv_list.append(gp.model.K_plus_noise_inv.detach()) print('Loaded!') gp_K_plus_noise = torch.stack(gp_K_plus_noise_list) - gp_K_plus_noise_inv = torch.stack(gp_K_plus_noise_inv_list) - + gp_K_plus_noise_inv = torch.stack(gp_K_plus_noise_inv_list) + self.K_plus_noise = gp_K_plus_noise self.K_plus_noise_inv = gp_K_plus_noise_inv self.casadi_predict = self.make_casadi_predict_func() @@ -424,8 +426,8 @@ def predict(self, Return Predictions means : torch.tensor (N_samples x output DIM). - covs : torch.tensor (N_samples x output DIM x output DIM). - NOTE: For compatibility with the original implementation, + covs : torch.tensor (N_samples x output DIM x output DIM). + NOTE: For compatibility with the original implementation, the output will be squeezed when N_samples == 1. ''' num_batch = x.shape[0] @@ -502,12 +504,12 @@ def make_casadi_linearized_predict_func(self): dmu[:, gp_ind] = gp.casadi_linearized_predict(z=z)['mean'] A, B = dmu.T[:, :Ny], dmu.T[:, Ny:] assert A.shape == (Ny, Ny), ValueError('A matrix has wrong shape.') - assert B.shape == (Ny, Nz-Ny), ValueError('B matrix has wrong shape.') + assert B.shape == (Ny, Nz - Ny), ValueError('B matrix has wrong shape.') casadi_lineaized_predict = ca.Function('linearized_pred', - [z], - [dmu, A, B], - ['z'], - ['mean', 'A', 'B']) + [z], + [dmu, A, B], + ['z'], + ['mean', 'A', 'B']) return casadi_lineaized_predict def prediction_jacobian(self, @@ -788,8 +790,8 @@ def predict(self, Returns: Predictions means : torch.tensor (N_samples x output DIM). - covs : torch.tensor (N_samples x output DIM x output DIM). - NOTE: For compatibility with the original implementation, + covs : torch.tensor (N_samples x output DIM x output DIM). + NOTE: For compatibility with the original implementation, the output will be squeezed when N_samples == 1. ''' @@ -1063,7 +1065,6 @@ def train(self, self.casadi_linearized_predict = \ self.make_casadi_linearized_prediction_func(train_x, train_y) - def predict(self, x, requires_grad=False, @@ -1135,7 +1136,7 @@ def make_casadi_prediction_func(self, train_inputs, train_targets): ['z'], ['mean']) return predict - + # def make_se_kernel_derivative_func(self, # train_x): # '''Get the derivative of the SE kernel with respect to the input. @@ -1156,7 +1157,7 @@ def make_casadi_prediction_func(self, train_inputs, train_targets): # M_inv = ca.DM(M_inv) # assert M.shape[0] == train_x.shape[1], ValueError('Mismatch in input dimensions') # Nx = len(self.input_mask) # number of input dimension - # num_data = train_x.shape[0] + # num_data = train_x.shape[0] # z = ca.SX.sym('z', Nx) # query point # # compute 1st derivative of the kernel (8) # dkdx = ca.SX.zeros(Nx, num_data) @@ -1166,7 +1167,7 @@ def make_casadi_prediction_func(self, train_inputs, train_targets): # dkdx = M_inv**2 @ dkdx # # compute 2nd derivative of the kernel (9) # d2kdx2 = M_inv**2 * output_scale ** 2 - + # dkdx_func = ca.Function('dkdx', # [z], # [dkdx], @@ -1178,7 +1179,7 @@ def make_casadi_prediction_func(self, train_inputs, train_targets): # ['z'], # ['d2kdx2']) # return dkdx_func, d2kdx2_func - + def make_casadi_linearized_prediction_func(self, train_inputs, train_targets): '''Get the linearized prediction casadi function. See Berkenkamp and Schoellig, 2015, eq. (8) (9) for the derivative @@ -1200,42 +1201,42 @@ def make_casadi_linearized_prediction_func(self, train_inputs, train_targets): M_inv = ca.DM(M_inv) assert M.shape[0] == train_inputs.shape[1], ValueError('Mismatch in input dimensions') num_data = train_inputs.shape[0] - z = ca.SX.sym('z', len(self.input_mask)) # query point + z = ca.SX.sym('z', len(self.input_mask)) # query point # compute 1st derivative of the kernel (8) dkdx = ca.SX.zeros(len(self.input_mask), num_data) for i in range(num_data): dkdx[:, i] = (train_inputs[i] - z) * \ - covSEard(z, train_inputs[i].T, lengthscale.T, output_scale) + covSEard(z, train_inputs[i].T, lengthscale.T, output_scale) dkdx = M_inv**2 @ dkdx # compute 2nd derivative of the kernel (9) - d2kdx2 = M_inv**2 * output_scale ** 2 - + d2kdx2 = M_inv**2 * output_scale ** 2 + dkdx_func = ca.Function('dkdx', [z], [dkdx], ['z'], ['dkdx']) d2kdx2_func = ca.Function('d2kdx2', - [z], - [d2kdx2], - ['z'], - ['d2kdx2']) + [z], + [d2kdx2], + ['z'], + ['d2kdx2']) mean = dkdx_func(z) \ - @ self.model.K_plus_noise_inv.detach().numpy() @ train_targets + @ self.model.K_plus_noise_inv.detach().numpy() @ train_targets linearized_predict = ca.Function('linearized_predict', - [z], - [mean], - ['z'], - ['mean']) + [z], + [mean], + ['z'], + ['mean']) return linearized_predict # def linearized_prediction(self, - # x, + # x, # requires_grad=False, # return_pred=True # ): # ''' - # Linearized predictions: + # Linearized predictions: # See Berkenkamp and Schoellig, 2015, eq. (10) (11). # Args: diff --git a/safe_control_gym/controllers/mpc/gpmpc_acados.py b/safe_control_gym/controllers/mpc/gpmpc_acados.py index 761501635..7711400ac 100644 --- a/safe_control_gym/controllers/mpc/gpmpc_acados.py +++ b/safe_control_gym/controllers/mpc/gpmpc_acados.py @@ -1,6 +1,5 @@ - import time from copy import deepcopy from functools import partial @@ -10,22 +9,24 @@ import numpy as np import scipy import torch +from acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver, AcadosSimSolver from sklearn.metrics import pairwise_distances_argmin_min from sklearn.model_selection import train_test_split from skopt.sampler import Lhs from safe_control_gym.controllers.lqr.lqr_utils import discretize_linear_system +from safe_control_gym.controllers.mpc.gp_mpc import GPMPC from safe_control_gym.controllers.mpc.gp_utils import (GaussianProcessCollection, ZeroMeanIndependentGPModel, covSEard, kmeans_centriods) from safe_control_gym.controllers.mpc.linear_mpc import MPC, LinearMPC from safe_control_gym.controllers.mpc.mpc import MPC -from safe_control_gym.controllers.mpc.gp_mpc import GPMPC # from safe_control_gym.controllers.mpc.sqp_mpc import SQPMPC from safe_control_gym.envs.benchmark_env import Task -from acados_template import AcadosOcp, AcadosOcpSolver, AcadosSimSolver, AcadosModel + class GPMPC_ACADOS(GPMPC): '''Implements a GP-MPC controller with Acados optimization.''' + def __init__( self, env_func, @@ -46,8 +47,8 @@ def __init__( use_gpu: bool = False, gp_model_path: str = None, n_ind_points: int = 30, - inducing_point_selection_method = 'kmeans', - recalc_inducing_points_at_every_step = False, + inducing_point_selection_method='kmeans', + recalc_inducing_points_at_every_step=False, prob: float = 0.955, initial_rollout_std: float = 0.005, input_mask: list = None, @@ -64,7 +65,7 @@ def __init__( use_RTI: bool = False, **kwargs ): - + if prior_info is None or prior_info == {}: raise ValueError('GPMPC_ACADOS requires prior_prop to be defined. You may use the real mass properties and then use prior_param_coeff to modify them accordingly.') prior_info['prior_prop'].update((prop, val * prior_param_coeff) for prop, val in prior_info['prior_prop'].items()) @@ -96,38 +97,38 @@ def __init__( self.sparse_gp = sparse_gp # super().__init__() # TODO: check the inheritance of the class super().__init__( - env_func = env_func, - seed= seed, - horizon = horizon, - q_mpc = q_mpc, - r_mpc = r_mpc, - constraint_tol = constraint_tol, - additional_constraints = additional_constraints, - soft_constraints = soft_constraints, - warmstart = warmstart, - train_iterations = train_iterations, - test_data_ratio = test_data_ratio, - overwrite_saved_data = overwrite_saved_data, - optimization_iterations = optimization_iterations, - learning_rate = learning_rate, - normalize_training_data = normalize_training_data, - use_gpu = use_gpu, - gp_model_path = gp_model_path, - prob = prob, - initial_rollout_std = initial_rollout_std, - input_mask = input_mask, - target_mask = target_mask, - gp_approx = gp_approx, - sparse_gp = sparse_gp, - n_ind_points = n_ind_points, - inducing_point_selection_method = 'kmeans', - recalc_inducing_points_at_every_step = False, - online_learning = online_learning, - prior_info = prior_info, + env_func=env_func, + seed=seed, + horizon=horizon, + q_mpc=q_mpc, + r_mpc=r_mpc, + constraint_tol=constraint_tol, + additional_constraints=additional_constraints, + soft_constraints=soft_constraints, + warmstart=warmstart, + train_iterations=train_iterations, + test_data_ratio=test_data_ratio, + overwrite_saved_data=overwrite_saved_data, + optimization_iterations=optimization_iterations, + learning_rate=learning_rate, + normalize_training_data=normalize_training_data, + use_gpu=use_gpu, + gp_model_path=gp_model_path, + prob=prob, + initial_rollout_std=initial_rollout_std, + input_mask=input_mask, + target_mask=target_mask, + gp_approx=gp_approx, + sparse_gp=sparse_gp, + n_ind_points=n_ind_points, + inducing_point_selection_method='kmeans', + recalc_inducing_points_at_every_step=False, + online_learning=online_learning, + prior_info=prior_info, # inertial_prop: list = [1.0], - prior_param_coeff = prior_param_coeff, - terminate_run_on_done = terminate_run_on_done, - output_dir = output_dir, + prior_param_coeff=prior_param_coeff, + terminate_run_on_done=terminate_run_on_done, + output_dir=output_dir, **kwargs) # self.prior_ctrl = LinearMPC( # self.prior_env_func, @@ -153,7 +154,7 @@ def __init__( self.data_inputs = None self.data_targets = None self.prior_dynamics_func = self.prior_ctrl.linear_dynamics_func - # self.prior_dynamics_func = self.prior_ctrl.dynamics_func # nonlinear prior + # self.prior_dynamics_func = self.prior_ctrl.dynamics_func # nonlinear prior self.X_EQ = self.prior_ctrl.X_EQ self.U_EQ = self.prior_ctrl.U_EQ # GP and training parameters. @@ -197,11 +198,11 @@ def __init__( self.setup_acados_model() self.setup_acados_optimizer() self.acados_ocp_solver = AcadosOcpSolver(self.ocp) - + def setup_acados_model(self) -> AcadosModel: model_name = self.env.NAME - + acados_model = AcadosModel() acados_model.x = self.model.x_sym acados_model.u = self.model.u_sym @@ -211,18 +212,18 @@ def setup_acados_model(self) -> AcadosModel: B_lin = self.discrete_dfdu if self.gaussian_process is None: - f_disc = self.prior_dynamics_func(x0=acados_model.x- self.X_EQ, - p=acados_model.u- self.U_EQ)['xf'] \ + f_disc = self.prior_dynamics_func(x0=acados_model.x - self.X_EQ, + p=acados_model.u - self.U_EQ)['xf'] \ + self.prior_ctrl.X_EQ[:, None] else: - z = cs.vertcat(acados_model.x, acados_model.u) # GP prediction point + z = cs.vertcat(acados_model.x, acados_model.u) # GP prediction point z = z[self.input_mask] if self.sparse_gp: raise NotImplementedError('Sparse GP not implemented for acados.') else: - f_disc = self.prior_dynamics_func(x0=acados_model.x- self.X_EQ, - p=acados_model.u- self.U_EQ)['xf'] \ - + self.prior_ctrl.X_EQ[:, None] + f_disc = self.prior_dynamics_func(x0=acados_model.x - self.X_EQ, + p=acados_model.u - self.U_EQ)['xf'] \ + + self.prior_ctrl.X_EQ[:, None] + self.Bd @ self.gaussian_process.casadi_predict(z=z)['mean'] acados_model.disc_dyn_expr = f_disc @@ -246,9 +247,9 @@ def setup_acados_optimizer(self): ocp.model = self.acados_model # set dimensions - ocp.dims.N = self.T # prediction horizon + ocp.dims.N = self.T # prediction horizon - # set cost + # set cost ocp.cost.cost_type = 'LINEAR_LS' ocp.cost.cost_type_e = 'LINEAR_LS' ocp.cost.W = scipy.linalg.block_diag(self.Q, self.R) @@ -256,7 +257,7 @@ def setup_acados_optimizer(self): ocp.cost.Vx = np.zeros((ny, nx)) ocp.cost.Vx[:nx, :nx] = np.eye(nx) ocp.cost.Vu = np.zeros((ny, nu)) - ocp.cost.Vu[nx:(nx+nu), :nu] = np.eye(nu) + ocp.cost.Vu[nx:(nx + nu), :nu] = np.eye(nu) ocp.cost.Vx_e = np.eye(nx) # placeholder y_ref and y_ref_e (will be set in select_action) ocp.cost.yref = np.zeros((ny, )) @@ -292,22 +293,21 @@ def setup_acados_optimizer(self): input_constraint_expr_list.append(input_constraint(ocp.model.u)) # chance input constraint tightening input_tighten_list.append(cs.MX.sym(f'input_tighten_{ic_i}', input_constraint(ocp.model.u).shape[0], 1)) - + h_expr_list = state_constraint_expr_list + input_constraint_expr_list h_expr = cs.vertcat(*h_expr_list) h0_expr = cs.vertcat(*h_expr_list) - he_expr = cs.vertcat(*state_constraint_expr_list) # terminal constraints are only state constraints + he_expr = cs.vertcat(*state_constraint_expr_list) # terminal constraints are only state constraints # pass the constraints to the ocp object ocp = self.processing_acados_constraints_expression(ocp, h0_expr, h_expr, he_expr, state_tighten_list, input_tighten_list) # pass the tightening variables to the ocp object as parameters tighten_var = cs.vertcat(*state_tighten_list, *input_tighten_list) - ocp.model.p = tighten_var - ocp.parameter_values = np.zeros((tighten_var.shape[0], )) # dummy values + ocp.model.p = tighten_var + ocp.parameter_values = np.zeros((tighten_var.shape[0], )) # dummy values # slack costs for nonlinear constraints if self.gp_soft_constraints: raise NotImplementedError('Soft constraints not implemented for acados.') - # placeholder initial state constraint x_init = np.zeros((nx)) @@ -324,10 +324,10 @@ def setup_acados_optimizer(self): self.ocp = ocp - def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_expr, he_expr, \ + def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_expr, he_expr, state_tighten_list, input_tighten_list) -> AcadosOcp: '''Preprocess the constraints to be compatible with acados. - Args: + Args: h0_expr (casadi expression): initial state constraints h_expr (casadi expression): state and input constraints he_expr (casadi expression): terminal state constraints @@ -335,34 +335,34 @@ def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_ex input_tighten_list (list): list of casadi SX variables for input constraint tightening Returns: ocp (AcadosOcp): acados ocp object with constraints set - + Note: all constraints in safe-control-gym are defined as g(x, u) <= constraint_tol However, acados requires the constraints to be defined as lb <= g(x, u) <= ub Thus, a large negative number (-1e8) is used as the lower bound. - See: https://github.com/acados/acados/issues/650 + See: https://github.com/acados/acados/issues/650 An alternative way to set the constraints is to use bounded constraints of acados: # bounded input constraints idxbu = np.where(np.sum(self.env.constraints.input_constraints[0].constraint_filter, axis=0) != 0)[0] ocp.constraints.Jbu = np.eye(nu) ocp.constraints.lbu = self.env.constraints.input_constraints[0].lower_bounds - ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds + ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds ocp.constraints.idxbu = idxbu # active constraints dimension ''' - # NOTE: only the upper bound is tightened due to constraint are defined in the + # NOTE: only the upper bound is tightened due to constraint are defined in the # form of g(x, u) <= constraint_tol in safe-control-gym # lambda functions to set the upper and lower bounds of the chance constraints - constraint_ub_chance = lambda constraint: -self.constraint_tol * np.ones(constraint.shape) - constraint_lb_chance = lambda constraint: -1e8 * np.ones(constraint.shape) + def constraint_ub_chance(constraint): return -self.constraint_tol * np.ones(constraint.shape) + def constraint_lb_chance(constraint): return -1e8 * np.ones(constraint.shape) state_tighten_var = cs.vertcat(*state_tighten_list) input_tighten_var = cs.vertcat(*input_tighten_list) - - ub = {'h': constraint_ub_chance(h_expr - cs.vertcat(state_tighten_var, input_tighten_var)), \ - 'h0': constraint_ub_chance(h0_expr - cs.vertcat(state_tighten_var, input_tighten_var)),\ + + ub = {'h': constraint_ub_chance(h_expr - cs.vertcat(state_tighten_var, input_tighten_var)), + 'h0': constraint_ub_chance(h0_expr - cs.vertcat(state_tighten_var, input_tighten_var)), 'he': constraint_ub_chance(he_expr - state_tighten_var)} - lb = {'h': constraint_lb_chance(h_expr), 'h0': constraint_lb_chance(h0_expr),\ + lb = {'h': constraint_lb_chance(h_expr), 'h0': constraint_lb_chance(h0_expr), 'he': constraint_lb_chance(he_expr)} # make sure all the ub and lb are 1D casaadi SX variables @@ -381,7 +381,7 @@ def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_ex ocp.model.con_h_expr = h_expr - cs.vertcat(state_tighten_var, input_tighten_var) ocp.model.con_h_expr_e = he_expr - state_tighten_var ocp.dims.nh_0, ocp.dims.nh, ocp.dims.nh_e = \ - h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0] + h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0] # assign constraints upper and lower bounds ocp.constraints.uh_0 = ub['h0'] ocp.constraints.lh_0 = lb['h0'] @@ -404,15 +404,15 @@ def select_action(self, obs, info=None): self.last_action = action print('gpmpc acados action selection time:', time_after - time_before) return action - + def select_action_with_gp(self, obs): nx, nu = self.model.nx, self.model.nu ny = nx + nu ny_e = nx # set initial condition (0-th state) - self.acados_ocp_solver.set(0, "lbx", obs) - self.acados_ocp_solver.set(0, "ubx", obs) + self.acados_ocp_solver.set(0, 'lbx', obs) + self.acados_ocp_solver.set(0, 'ubx', obs) if self.warmstart: if self.x_guess is None or self.u_guess is None: if self.compute_ipopt_initial_guess: @@ -426,36 +426,36 @@ def select_action_with_gp(self, obs): self.u_guess = np.zeros((nu, self.T)) for idx in range(self.T + 1): init_x = self.x_guess[:, idx] - self.acados_ocp_solver.set(idx, "x", init_x) + self.acados_ocp_solver.set(idx, 'x', init_x) for idx in range(self.T): if nu == 1: init_u = np.array([self.u_guess[idx]]) else: init_u = self.u_guess[:, idx] - self.acados_ocp_solver.set(idx, "u", init_u) + self.acados_ocp_solver.set(idx, 'u', init_u) else: for idx in range(self.T + 1): - self.acados_ocp_solver.set(idx, "x", obs) + self.acados_ocp_solver.set(idx, 'x', obs) for idx in range(self.T): - self.acados_ocp_solver.set(idx, "u", np.zeros((nu,))) + self.acados_ocp_solver.set(idx, 'u', np.zeros((nu,))) # Set the probabilistic state and input constraint set limits. - # Tightening at the first step is possible if self.compute_initial_guess is used + # Tightening at the first step is possible if self.compute_initial_guess is used time_before = time.time() state_constraint_set_prev, input_constraint_set_prev = self.precompute_probabilistic_limits() time_after = time.time() print('precompute_probabilistic_limits time:', time_after - time_before) - + # for si in range(len(self.constraints.state_constraints)): # tighten initial and path constraints for idx in range(self.T): state_constraint_set = state_constraint_set_prev[0][:, idx] input_constraint_set = input_constraint_set_prev[0][:, idx] tighten_value = np.concatenate((state_constraint_set, input_constraint_set)) - self.acados_ocp_solver.set(idx, "p", tighten_value) + self.acados_ocp_solver.set(idx, 'p', tighten_value) # set terminal state constraints tighten_value = np.concatenate((state_constraint_set_prev[0][:, self.T], np.zeros((2 * nu,)))) - self.acados_ocp_solver.set(self.T, "p", tighten_value) + self.acados_ocp_solver.set(self.T, 'p', tighten_value) # print('tighten_value:', tighten_value) # print('state_constraint_set_prev[0][:, self.T]:', state_constraint_set_prev[0][:, self.T]) @@ -465,9 +465,9 @@ def select_action_with_gp(self, obs): self.traj_step += 1 for idx in range(self.T): y_ref = np.concatenate((goal_states[:, idx], np.zeros((nu,)))) - self.acados_ocp_solver.set(idx, "yref", y_ref) - y_ref_e = goal_states[:, -1] - self.acados_ocp_solver.set(self.T, "yref", y_ref_e) + self.acados_ocp_solver.set(idx, 'yref', y_ref) + y_ref_e = goal_states[:, -1] + self.acados_ocp_solver.set(self.T, 'yref', y_ref_e) # solve the optimization problem # try: @@ -477,17 +477,17 @@ def select_action_with_gp(self, obs): status = self.acados_ocp_solver.solve() # feedback phase - self.acados_ocp_solver.options_set('rti_phase', 2) + self.acados_ocp_solver.options_set('rti_phase', 2) status = self.acados_ocp_solver.solve() - + if status not in [0, 2]: self.acados_ocp_solver.print_statistics() raise Exception(f'acados returned status {status}. Exiting.') # print(f"acados returned status {status}. ") if status == 2: - print(f"acados returned status {status}. ") - - action = self.acados_ocp_solver.get(0, "u") + print(f'acados returned status {status}. ') + + action = self.acados_ocp_solver.get(0, 'u') else: status = self.acados_ocp_solver.solve() @@ -496,15 +496,15 @@ def select_action_with_gp(self, obs): raise Exception(f'acados returned status {status}. Exiting.') # print(f"acados returned status {status}. ") if status == 2: - print(f"acados returned status {status}. ") - action = self.acados_ocp_solver.get(0, "u") + print(f'acados returned status {status}. ') + action = self.acados_ocp_solver.get(0, 'u') # except Exception as e: # print(f"========== acados solver failed with error: {e} =============") # print('using prior controller') # action = self.prior_ctrl.select_action(obs) return action - + def reset(self): '''Reset the controller before running.''' # Setup reference input. @@ -516,12 +516,12 @@ def reset(self): self.traj = self.env.X_GOAL.T self.traj_step = 0 # Dynamics model. - + if self.gaussian_process is not None: self.set_gp_dynamics_func(self.n_ind_points) self.setup_acados_model() self.setup_acados_optimizer() - # n_ind_points = self.train_data['train_targets'].shape[0] + # n_ind_points = self.train_data['train_targets'].shape[0] print('=========== Resetting prior controller ===========') self.prior_ctrl.reset() self.setup_results_dict() @@ -531,4 +531,3 @@ def reset(self): self.x_guess = None self.u_guess = None - diff --git a/safe_control_gym/controllers/mpc/mpc.py b/safe_control_gym/controllers/mpc/mpc.py index bffb64af3..5de45cf78 100644 --- a/safe_control_gym/controllers/mpc/mpc.py +++ b/safe_control_gym/controllers/mpc/mpc.py @@ -82,7 +82,7 @@ def __init__( # print(self.env.__dir__()) # print('self.env.X_GOAL', self.env.X_GOAL) - # NOTE: The naming X_EQ and U_EQ can be confusing + # NOTE: The naming X_EQ and U_EQ can be confusing self.X_EQ = self.env.X_GOAL self.U_EQ = self.env.U_GOAL self.init_solver = 'ipopt' @@ -163,7 +163,7 @@ def compute_lqr_initial_guess(self, init_state, goal_states, x_lin, u_lin): dfdx = dfdxdfdu['dfdx'].toarray() dfdu = dfdxdfdu['dfdu'].toarray() lqr_gain, _, _ = compute_discrete_lqr_gain_from_cont_linear_system(dfdx, dfdu, self.Q, self.R, self.dt) - + # initialize the guess solutions x_guess = np.zeros((self.model.nx, self.T + 1)) u_guess = np.zeros((self.model.nu, self.T)) @@ -175,20 +175,20 @@ def compute_lqr_initial_guess(self, init_state, goal_states, x_lin, u_lin): x_guess[:, i + 1, None] = self.dynamics_func(x0=x_guess[:, i], p=u)['xf'].toarray() return x_guess, u_guess - + def compute_initial_guess(self, init_state, goal_states): time_before = time.time() '''Use IPOPT to get an initial guess of the ''' self.setup_optimizer(solver=self.init_solver) opti_dict = self.opti_dict opti = opti_dict['opti'] - x_var = opti_dict['x_var'] # optimization variables - u_var = opti_dict['u_var'] # optimization variables - x_init = opti_dict['x_init'] # initial state - x_ref = opti_dict['x_ref'] # reference state/trajectory + x_var = opti_dict['x_var'] # optimization variables + u_var = opti_dict['u_var'] # optimization variables + x_init = opti_dict['x_init'] # initial state + x_ref = opti_dict['x_ref'] # reference state/trajectory # Assign the initial state. - opti.set_value(x_init, init_state) # initial state should have dim (nx,) + opti.set_value(x_init, init_state) # initial state should have dim (nx,) # Assign reference trajectory within horizon. goal_states = self.get_references() opti.set_value(x_ref, goal_states) @@ -207,7 +207,7 @@ def compute_initial_guess(self, init_state, goal_states): # set the solver back self.setup_optimizer(solver=self.solver) - + time_after = time.time() print('MPC _compute_initial_guess time: ', time_after - time_before) @@ -295,7 +295,7 @@ def setup_optimizer(self, solver='qrsqp'): # print(opti) # exit() - + self.opti_dict = { 'opti': opti, 'x_var': x_var, @@ -321,10 +321,10 @@ def select_action(self, time_before = time.time() opti_dict = self.opti_dict opti = opti_dict['opti'] - x_var = opti_dict['x_var'] # optimization variables - u_var = opti_dict['u_var'] # optimization variables - x_init = opti_dict['x_init'] # initial state - x_ref = opti_dict['x_ref'] # reference state/trajectory + x_var = opti_dict['x_var'] # optimization variables + u_var = opti_dict['u_var'] # optimization variables + x_init = opti_dict['x_init'] # initial state + x_ref = opti_dict['x_ref'] # reference state/trajectory # Assign the initial state. opti.set_value(x_init, obs) @@ -335,13 +335,13 @@ def select_action(self, self.traj_step += 1 if self.warmstart and self.x_prev is None and self.u_prev is None: - # x_guess, u_guess = self.compute_lqr_initial_guess(obs, goal_states, self.X_EQ, self.U_EQ) - print(f'computing initial guess with {self.init_solver}') - x_guess, u_guess = self.compute_initial_guess(obs, goal_states) - opti.set_initial(x_var, x_guess) - opti.set_initial(u_var, u_guess) # Initial guess for optimization problem. + # x_guess, u_guess = self.compute_lqr_initial_guess(obs, goal_states, self.X_EQ, self.U_EQ) + print(f'computing initial guess with {self.init_solver}') + x_guess, u_guess = self.compute_initial_guess(obs, goal_states) + opti.set_initial(x_var, x_guess) + opti.set_initial(u_var, u_guess) # Initial guess for optimization problem. elif self.warmstart and self.x_prev is not None and self.u_prev is not None: - # if self.warmstart and self.x_prev is not None and self.u_prev is not None: + # if self.warmstart and self.x_prev is not None and self.u_prev is not None: # shift previous solutions by 1 step x_guess = deepcopy(self.x_prev) u_guess = deepcopy(self.u_prev) @@ -375,7 +375,7 @@ def select_action(self, u_val = opti.debug.value(u_var) x_val = opti.debug.value(x_var) skip = 8 - print('x_val: ', x_val[:,::skip]) + print('x_val: ', x_val[:, ::skip]) print('u_val: ', u_val[::skip]) self.x_prev = x_val self.u_prev = u_val diff --git a/safe_control_gym/controllers/mpc/mpc_acados.py b/safe_control_gym/controllers/mpc/mpc_acados.py index 8f9a70f71..936cb1e2f 100644 --- a/safe_control_gym/controllers/mpc/mpc_acados.py +++ b/safe_control_gym/controllers/mpc/mpc_acados.py @@ -5,6 +5,7 @@ import casadi as cs import numpy as np import scipy +from acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver, AcadosSimSolver from safe_control_gym.controllers.base_controller import BaseController from safe_control_gym.controllers.mpc.mpc_utils import (compute_discrete_lqr_gain_from_cont_linear_system, @@ -13,7 +14,6 @@ from safe_control_gym.envs.benchmark_env import Task from safe_control_gym.envs.constraints import GENERAL_CONSTRAINTS, create_constraint_list -from acados_template import AcadosOcp, AcadosOcpSolver, AcadosSimSolver, AcadosModel class MPC_ACADOS(BaseController): '''MPC with full nonlinear model.''' @@ -85,10 +85,10 @@ def __init__( # print(self.env.__dir__()) # print('self.env.X_GOAL', self.env.X_GOAL) - # NOTE: The naming X_EQ and U_EQ can be confusing + # NOTE: The naming X_EQ and U_EQ can be confusing self.X_EQ = self.env.X_GOAL self.U_EQ = self.env.U_GOAL - + # warm-starting self.init_solver = 'ipopt' self.x_guess = None @@ -99,9 +99,8 @@ def __init__( self.set_dynamics_func() self.setup_acados_model() self.setup_acados_optimizer() - self.acados_ocp_solver = AcadosOcpSolver(self.ocp) # , \ - # json_file=f'acados_{self.ocp.model.name}.json') - + self.acados_ocp_solver = AcadosOcpSolver(self.ocp) # , \ + # json_file=f'acados_{self.ocp.model.name}.json') def add_constraints(self, constraints @@ -175,7 +174,7 @@ def set_dynamics_func(self): def setup_acados_model(self) -> AcadosModel: model_name = self.env.NAME - + acados_model = AcadosModel() acados_model.x = self.model.x_sym # acados_model.xdot = self.model.x_dot_acados # must be symbolic @@ -184,10 +183,10 @@ def setup_acados_model(self) -> AcadosModel: # set up rk4 (acados need symbolic expression of dynamics, not function) k1 = self.model.fc_func(acados_model.x, acados_model.u) - k2 = self.model.fc_func(acados_model.x + self.dt/2 * k1, acados_model.u) - k3 = self.model.fc_func(acados_model.x + self.dt/2 * k2, acados_model.u) + k2 = self.model.fc_func(acados_model.x + self.dt / 2 * k1, acados_model.u) + k3 = self.model.fc_func(acados_model.x + self.dt / 2 * k2, acados_model.u) k4 = self.model.fc_func(acados_model.x + self.dt * k3, acados_model.u) - f_disc = acados_model.x + self.dt/6 * (k1 + 2*k2 + 2*k3 + k4) + f_disc = acados_model.x + self.dt / 6 * (k1 + 2 * k2 + 2 * k3 + k4) acados_model.disc_dyn_expr = f_disc # f_expl = self.model.x_dot @@ -201,14 +200,14 @@ def setup_acados_model(self) -> AcadosModel: acados_model.t_label = 'time' self.acados_model = acados_model - + # def compute_lqr_initial_guess(self, init_state, goal_states, x_lin, u_lin): # '''Use LQR to get an initial guess of the ''' # dfdxdfdu = self.model.df_func(x=x_lin, u=u_lin) # dfdx = dfdxdfdu['dfdx'].toarray() # dfdu = dfdxdfdu['dfdu'].toarray() # lqr_gain, _, _ = compute_discrete_lqr_gain_from_cont_linear_system(dfdx, dfdu, self.Q, self.R, self.dt) - + # # initialize the guess solutions # x_guess = np.zeros((self.model.nx, self.T + 1)) # u_guess = np.zeros((self.model.nu, self.T)) @@ -219,26 +218,26 @@ def setup_acados_model(self) -> AcadosModel: # u_guess[:, i] = u # x_guess[:, i + 1, None] = self.dynamics_func(x0=x_guess[:, i], p=u)['xf'].toarray() # return x_guess, u_guess - + def compute_initial_guess(self, init_state, goal_states): time_before = time.time() '''Use IPOPT to get an initial guess of the ''' self.setup_optimizer(solver=self.init_solver) opti_dict = self.opti_dict opti = opti_dict['opti'] - x_var = opti_dict['x_var'] # optimization variables - u_var = opti_dict['u_var'] # optimization variables - x_init = opti_dict['x_init'] # initial state - x_ref = opti_dict['x_ref'] # reference state/trajectory + x_var = opti_dict['x_var'] # optimization variables + u_var = opti_dict['u_var'] # optimization variables + x_init = opti_dict['x_init'] # initial state + x_ref = opti_dict['x_ref'] # reference state/trajectory # Assign the initial state. - opti.set_value(x_init, init_state) # initial state should have dim (nx,) + opti.set_value(x_init, init_state) # initial state should have dim (nx,) # Assign reference trajectory within horizon. goal_states = self.get_references() opti.set_value(x_ref, goal_states) # if self.mode == 'tracking': # self.traj_step += 1 - # Solve the optimization problem. + # Solve the optimization problem. try: sol = opti.solve() x_val, u_val = sol.value(x_var), sol.value(u_var) @@ -263,7 +262,7 @@ def setup_acados_optimizer(self): ocp.model = self.acados_model # set dimensions - ocp.dims.N = self.T # prediction horizon + ocp.dims.N = self.T # prediction horizon # set cost (NOTE: safe-control-gym uses quadratic cost) ocp.cost.cost_type = 'LINEAR_LS' @@ -273,7 +272,7 @@ def setup_acados_optimizer(self): ocp.cost.Vx = np.zeros((ny, nx)) ocp.cost.Vx[:nx, :nx] = np.eye(nx) ocp.cost.Vu = np.zeros((ny, nu)) - ocp.cost.Vu[nx:(nx+nu), :nu] = np.eye(nu) + ocp.cost.Vu[nx:(nx + nu), :nu] = np.eye(nu) ocp.cost.Vx_e = np.eye(nx) # placeholder y_ref and y_ref_e (will be set in select_action) ocp.cost.yref = np.zeros((ny, )) @@ -284,7 +283,7 @@ def setup_acados_optimizer(self): # idxbu = np.where(np.sum(self.env.constraints.input_constraints[0].constraint_filter, axis=0) != 0)[0] # ocp.constraints.Jbu = np.eye(nu) # ocp.constraints.lbu = self.env.constraints.input_constraints[0].lower_bounds - # ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds + # ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds # ocp.constraints.idxbu = idxbu # active constraints dimension # # bounded state constraints # idxbx = np.where(np.sum(self.env.constraints.state_constraints[0].constraint_filter, axis=0) != 0)[0] @@ -309,7 +308,7 @@ def setup_acados_optimizer(self): h_expr_list = state_constraint_expr_list + input_constraint_expr_list h_expr = cs.vertcat(*h_expr_list) h0_expr = cs.vertcat(*h_expr_list) - he_expr = cs.vertcat(*state_constraint_expr_list) # terminal constraints are only state constraints + he_expr = cs.vertcat(*state_constraint_expr_list) # terminal constraints are only state constraints # pass the constraints to the ocp object ocp = self.processing_acados_constraints_expression(ocp, h0_expr, h_expr, he_expr) @@ -324,13 +323,13 @@ def setup_acados_optimizer(self): L1_pen = 1e4 ocp.cost.Zu = L2_pen * np.ones(h_expr.shape[0]) ocp.cost.Zl = L2_pen * np.ones(h_expr.shape[0]) - ocp.cost.zl = L1_pen * np.ones(h_expr.shape[0]) + ocp.cost.zl = L1_pen * np.ones(h_expr.shape[0]) ocp.cost.zu = L1_pen * np.ones(h_expr.shape[0]) ocp.cost.Zl_e = L2_pen * np.ones(he_expr.shape[0]) ocp.cost.Zu_e = L2_pen * np.ones(he_expr.shape[0]) ocp.cost.zl_e = L1_pen * np.ones(he_expr.shape[0]) ocp.cost.zu_e = L1_pen * np.ones(he_expr.shape[0]) - + # placeholder initial state constraint x_init = np.zeros((nx)) ocp.constraints.x0 = x_init @@ -348,34 +347,34 @@ def setup_acados_optimizer(self): def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_expr, he_expr) -> AcadosOcp: '''Preprocess the constraints to be compatible with acados. - Args: + Args: h0_expr (casadi expression): initial state constraints h_expr (casadi expression): state and input constraints he_expr (casadi expression): terminal state constraints Returns: ocp (AcadosOcp): acados ocp object with constraints set. - + Note: all constraints in safe-control-gym are defined as g(x, u) <= constraint_tol However, acados requires the constraints to be defined as lb <= g(x, u) <= ub Thus, a large negative number (-1e8) is used as the lower bound. - See: https://github.com/acados/acados/issues/650 + See: https://github.com/acados/acados/issues/650 An alternative way to set the constraints is to use bounded constraints of acados: # bounded input constraints idxbu = np.where(np.sum(self.env.constraints.input_constraints[0].constraint_filter, axis=0) != 0)[0] ocp.constraints.Jbu = np.eye(nu) ocp.constraints.lbu = self.env.constraints.input_constraints[0].lower_bounds - ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds + ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds ocp.constraints.idxbu = idxbu # active constraints dimension ''' # lambda functions to set the upper and lower bounds of the constraints - constraint_ub = lambda constraint: -self.constraint_tol * np.ones(constraint.shape) - constraint_lb = lambda constraint: -1e8 * np.ones(constraint.shape) + def constraint_ub(constraint): return -self.constraint_tol * np.ones(constraint.shape) + def constraint_lb(constraint): return -1e8 * np.ones(constraint.shape) ub = {'h': constraint_ub(h_expr), 'h0': constraint_ub(h0_expr), 'he': constraint_ub(he_expr)} lb = {'h': constraint_lb(h_expr), 'h0': constraint_lb(h0_expr), 'he': constraint_lb(he_expr)} - # make sure all the ub and lb are 1D numpy arrays + # make sure all the ub and lb are 1D numpy arrays # (see: https://discourse.acados.org/t/infeasible-qps-when-using-nonlinear-casadi-constraint-expressions/1595/5?u=mxche) for key in ub.keys(): ub[key] = ub[key].flatten() if ub[key].ndim != 1 else ub[key] @@ -388,9 +387,9 @@ def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_ex # pass the constraints to the ocp object ocp.model.con_h_expr_0, ocp.model.con_h_expr, ocp.model.con_h_expr_e = \ - h0_expr, h_expr, he_expr + h0_expr, h_expr, he_expr ocp.dims.nh_0, ocp.dims.nh, ocp.dims.nh_e = \ - h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0] + h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0] # assign constraints upper and lower bounds ocp.constraints.uh_0 = ub['h0'] ocp.constraints.lh_0 = lb['h0'] @@ -420,30 +419,30 @@ def select_action(self, ny = nx + nu ny_e = nx # set initial condition (0-th state) - self.acados_ocp_solver.set(0, "lbx", obs) - self.acados_ocp_solver.set(0, "ubx", obs) + self.acados_ocp_solver.set(0, 'lbx', obs) + self.acados_ocp_solver.set(0, 'ubx', obs) time_after_init = time.time() # warm-starting solver (otherwise, zeros by default) time_before_warmstart = time.time() if self.warmstart: - if self.x_guess is None or self.u_guess is None: - # compute initial guess with IPOPT - self.compute_initial_guess(obs, self.get_references()) + if self.x_guess is None or self.u_guess is None: + # compute initial guess with IPOPT + self.compute_initial_guess(obs, self.get_references()) for idx in range(self.T + 1): init_x = self.x_guess[:, idx] - self.acados_ocp_solver.set(idx, "x", init_x) + self.acados_ocp_solver.set(idx, 'x', init_x) for idx in range(self.T): if nu == 1: init_u = np.array([self.u_guess[idx]]) else: init_u = self.u_guess[:, idx] - self.acados_ocp_solver.set(idx, "u", init_u) + self.acados_ocp_solver.set(idx, 'u', init_u) else: for idx in range(self.T + 1): - self.acados_ocp_solver.set(idx, "x", obs) + self.acados_ocp_solver.set(idx, 'x', obs) for idx in range(self.T): - self.acados_ocp_solver.set(idx, "u", np.zeros((nu,))) + self.acados_ocp_solver.set(idx, 'u', np.zeros((nu,))) time_after_warmstart = time.time() # set reference for the control horizon @@ -452,17 +451,17 @@ def select_action(self, time_after_get_ref = time.time() if self.mode == 'tracking': self.traj_step += 1 - + y_ref = np.concatenate((goal_states[:, :-1], np.zeros((nu, self.T)))) time_before_for_loop = time.time() - for idx in range(self.T): - self.acados_ocp_solver.set(idx, "yref", y_ref[:, idx]) + for idx in range(self.T): + self.acados_ocp_solver.set(idx, 'yref', y_ref[:, idx]) # y_ref = np.concatenate((goal_states[:, idx], np.zeros((nu,)))) # self.acados_ocp_solver.set(idx, "yref", y_ref) time_after_for_loop = time.time() time_before_set_final_ref = time.time() - y_ref_e = goal_states[:, -1] - self.acados_ocp_solver.set(self.T, "yref", y_ref_e) + y_ref_e = goal_states[:, -1] + self.acados_ocp_solver.set(self.T, 'yref', y_ref_e) time_after_set_final_ref = time.time() # solve the optimization problem @@ -475,7 +474,7 @@ def select_action(self, # feedback phase time_before_feedback = time.time() - self.acados_ocp_solver.options_set('rti_phase', 2) + self.acados_ocp_solver.options_set('rti_phase', 2) status = self.acados_ocp_solver.solve() time_after_feedback = time.time() @@ -484,9 +483,9 @@ def select_action(self, raise Exception(f'acados returned status {status}. Exiting.') # print(f"acados returned status {status}. ") if status == 2: - print(f"acados returned status {status}. ") - - action = self.acados_ocp_solver.get(0, "u") + print(f'acados returned status {status}. ') + + action = self.acados_ocp_solver.get(0, 'u') elif not self.use_RTI: status = self.acados_ocp_solver.solve() @@ -495,17 +494,17 @@ def select_action(self, raise Exception(f'acados returned status {status}. Exiting.') # print(f"acados returned status {status}. ") if status == 2: - print(f"acados returned status {status}. ") - action = self.acados_ocp_solver.get(0, "u") + print(f'acados returned status {status}. ') + action = self.acados_ocp_solver.get(0, 'u') # get the open-loop solution time_before_saving = time.time() self.x_prev = np.zeros((nx, self.T + 1)) self.u_prev = np.zeros((nu, self.T)) for i in range(self.T + 1): - self.x_prev[:, i] = self.acados_ocp_solver.get(i, "x") + self.x_prev[:, i] = self.acados_ocp_solver.get(i, 'x') for i in range(self.T): - self.u_prev[:, i] = self.acados_ocp_solver.get(i, "u") + self.u_prev[:, i] = self.acados_ocp_solver.get(i, 'u') if nu == 1: self.u_prev = self.u_prev.flatten() @@ -518,7 +517,6 @@ def select_action(self, self.prev_action = action time_after_saving = time.time() - time_after = time.time() print('Initialization time: ', time_after_init - time_before_init) print('Warm-starting time: ', time_after_warmstart - time_before_warmstart) diff --git a/safe_control_gym/controllers/mpc/sqp_gp_mpc.py b/safe_control_gym/controllers/mpc/sqp_gp_mpc.py index 646f3668f..e123b0854 100644 --- a/safe_control_gym/controllers/mpc/sqp_gp_mpc.py +++ b/safe_control_gym/controllers/mpc/sqp_gp_mpc.py @@ -1,6 +1,5 @@ - import time from copy import deepcopy from functools import partial @@ -15,16 +14,18 @@ from skopt.sampler import Lhs from safe_control_gym.controllers.lqr.lqr_utils import discretize_linear_system +from safe_control_gym.controllers.mpc.gp_mpc import GPMPC from safe_control_gym.controllers.mpc.gp_utils import (GaussianProcessCollection, ZeroMeanIndependentGPModel, covSEard, kmeans_centriods) from safe_control_gym.controllers.mpc.linear_mpc import MPC, LinearMPC from safe_control_gym.controllers.mpc.mpc import MPC -from safe_control_gym.controllers.mpc.gp_mpc import GPMPC from safe_control_gym.controllers.mpc.sqp_mpc import SQPMPC from safe_control_gym.envs.benchmark_env import Task + class SQPGPMPC(GPMPC): '''Implements a GP-MPC controller with SQP optimization.''' + def __init__( self, env_func, @@ -57,7 +58,7 @@ def __init__( output_dir: str = 'results/temp', **kwargs ): - + if prior_info is None or prior_info == {}: raise ValueError('SQPGPMPC requires prior_prop to be defined. You may use the real mass properties and then use prior_param_coeff to modify them accordingly.') prior_info['prior_prop'].update((prop, val * prior_param_coeff) for prop, val in prior_info['prior_prop'].items()) @@ -103,51 +104,51 @@ def __init__( # self.prior_ctrl.reset() # super().__init__() # TODO: check the inheritance of the class super().__init__( - env_func = env_func, - seed= seed, - horizon = horizon, - q_mpc = q_mpc, - r_mpc = r_mpc, - constraint_tol = constraint_tol, - additional_constraints = additional_constraints, - soft_constraints = soft_constraints, - warmstart = warmstart, - train_iterations = train_iterations, - test_data_ratio = test_data_ratio, - overwrite_saved_data = overwrite_saved_data, - optimization_iterations = optimization_iterations, - learning_rate = learning_rate, - normalize_training_data = normalize_training_data, - use_gpu = use_gpu, - gp_model_path = gp_model_path, - prob = prob, - initial_rollout_std = initial_rollout_std, - input_mask = input_mask, - target_mask = target_mask, - gp_approx = gp_approx, - sparse_gp = False, - n_ind_points = 50, - inducing_point_selection_method = 'kmeans', - recalc_inducing_points_at_every_step = False, - online_learning = online_learning, - prior_info = prior_info, + env_func=env_func, + seed=seed, + horizon=horizon, + q_mpc=q_mpc, + r_mpc=r_mpc, + constraint_tol=constraint_tol, + additional_constraints=additional_constraints, + soft_constraints=soft_constraints, + warmstart=warmstart, + train_iterations=train_iterations, + test_data_ratio=test_data_ratio, + overwrite_saved_data=overwrite_saved_data, + optimization_iterations=optimization_iterations, + learning_rate=learning_rate, + normalize_training_data=normalize_training_data, + use_gpu=use_gpu, + gp_model_path=gp_model_path, + prob=prob, + initial_rollout_std=initial_rollout_std, + input_mask=input_mask, + target_mask=target_mask, + gp_approx=gp_approx, + sparse_gp=False, + n_ind_points=50, + inducing_point_selection_method='kmeans', + recalc_inducing_points_at_every_step=False, + online_learning=online_learning, + prior_info=prior_info, # inertial_prop: list = [1.0], - prior_param_coeff = prior_param_coeff, - terminate_run_on_done = terminate_run_on_done, - output_dir = output_dir, + prior_param_coeff=prior_param_coeff, + terminate_run_on_done=terminate_run_on_done, + output_dir=output_dir, **kwargs) self.prior_ctrl = SQPMPC( - env_func = self.prior_env_func, - seed= seed, - horizon = horizon, - q_mpc = q_mpc, - r_mpc = r_mpc, - warmstart= warmstart, - soft_constraints= self.soft_constraints_params['prior_soft_constraints'], - terminate_run_on_done= terminate_run_on_done, - prior_info= prior_info, - output_dir= output_dir, - additional_constraints= additional_constraints) + env_func=self.prior_env_func, + seed=seed, + horizon=horizon, + q_mpc=q_mpc, + r_mpc=r_mpc, + warmstart=warmstart, + soft_constraints=self.soft_constraints_params['prior_soft_constraints'], + terminate_run_on_done=terminate_run_on_done, + prior_info=prior_info, + output_dir=output_dir, + additional_constraints=additional_constraints) # self.prior_ctrl = LinearMPC( # self.prior_env_func, # horizon=horizon, @@ -172,7 +173,7 @@ def __init__( self.data_inputs = None self.data_targets = None # self.prior_dynamics_func = self.prior_ctrl.linear_dynamics_func - self.prior_dynamics_func = self.prior_ctrl.dynamics_func # nonlinear prior + self.prior_dynamics_func = self.prior_ctrl.dynamics_func # nonlinear prior self.X_EQ = self.prior_ctrl.X_EQ self.U_EQ = self.prior_ctrl.U_EQ # GP and training parameters. @@ -213,7 +214,7 @@ def __init__( self.x_prev = None self.u_prev = None # exit() - + def set_lin_gp_dynamics_func(self): '''Updates symbolic dynamics with actual control frequency.''' # Original version, used in shooting. @@ -222,28 +223,28 @@ def set_lin_gp_dynamics_func(self): x_guess = cs.MX.sym('x_guess', self.model.nx, 1) u_guess = cs.MX.sym('u_guess', self.model.nu, 1) dfdxdfdu = self.model.df_func(x=x_guess, u=u_guess) - dfdx = dfdxdfdu['dfdx']#.toarray() - dfdu = dfdxdfdu['dfdu']#.toarray() - z = cs.MX.sym('z', self.model.nx + self.model.nu, 1) # query point (the linearization point) + dfdx = dfdxdfdu['dfdx'] # .toarray() + dfdu = dfdxdfdu['dfdu'] # .toarray() + z = cs.MX.sym('z', self.model.nx + self.model.nu, 1) # query point (the linearization point) Ad = cs.DM_eye(self.model.nx) + dfdx * self.dt Bd = dfdu * self.dt A_gp = self.gaussian_process.casadi_linearized_predict(z=z)['A'] B_gp = self.gaussian_process.casadi_linearized_predict(z=z)['B'] assert A_gp.shape == (self.model.nx, self.model.nx) assert B_gp.shape == (self.model.nx, self.model.nu) - A = Ad + A_gp # TODO: check why Bd is used here correctly + A = Ad + A_gp # TODO: check why Bd is used here correctly B = Bd + B_gp x_dot_lin = A @ delta_x + B @ delta_u - self.linear_gp_dynamics_func = cs.Function('linear_dynamics_func', - [delta_x, delta_u, x_guess, u_guess, z], - [x_dot_lin, A, B], - ['x0', 'p', 'x_guess', 'u_guess', 'z'], - ['xf', 'A', 'B']) + self.linear_gp_dynamics_func = cs.Function('linear_dynamics_func', + [delta_x, delta_u, x_guess, u_guess, z], + [x_dot_lin, A, B], + ['x0', 'p', 'x_guess', 'u_guess', 'z'], + ['xf', 'A', 'B']) self.dfdx = A self.dfdu = B - + def setup_sqp_gp_optimizer(self): - print(f'Setting up SQP GP MPC optimizer.') + print(f'Setting up SQP GP MPC optimizer.') before_optimizer_setup = time.time() nx, nu = self.model.nx, self.model.nu T = self.T @@ -279,12 +280,10 @@ def setup_sqp_gp_optimizer(self): # for input_constraint in self.constraints.input_constraints: # input_constraint_set.append(opti.parameter(input_constraint.num_constraints, T)) - - # Sparse GP mean postfactor matrix. (not used here!) # TODO: check if this is needed mean_post_factor = opti.parameter(len(self.target_mask), self.train_data['train_targets'].shape[0]) - + # cost (cumulative) cost = 0 cost_func = self.model.loss @@ -309,14 +308,14 @@ def setup_sqp_gp_optimizer(self): # Constraints for i in range(self.T): # Dynamics constraints using the dynamics of the prior and the mean of the GP. - next_state = self.linear_gp_dynamics_func(x0=x_var[:, i], p=u_var[:, i], \ - x_guess=x_guess[:,i], u_guess=u_guess[:,i], \ - z=z[:, i])['xf'] + next_state = self.linear_gp_dynamics_func(x0=x_var[:, i], p=u_var[:, i], + x_guess=x_guess[:, i], u_guess=u_guess[:, i], + z=z[:, i])['xf'] opti.subject_to(x_var[:, i + 1] == next_state) # TODO: probablistic constraints tightening for sc_i, state_constraint in enumerate(self.state_constraints_sym): opti.subject_to(state_constraint(x_var[:, i] + x_guess[:, i]) <= -self.constraint_tol) - + for ic_i, input_constraint in enumerate(self.input_constraints_sym): opti.subject_to(input_constraint(u_var[:, i] + u_guess[:, i]) <= -self.constraint_tol) @@ -326,7 +325,7 @@ def setup_sqp_gp_optimizer(self): # initial condiiton constraints opti.subject_to(x_var[:, 0] + x_guess[:, 0] == x_init) opti.minimize(cost) - # create solver + # create solver opts = {'expand': True} opti.solver(self.qp_solver, opts) self.opti_dict = { @@ -354,7 +353,7 @@ def select_action(self, obs, info=None): self.u_guess = u_val + self.u_guess self.x_guess = x_val + self.x_guess if np.linalg.norm(u_val - self.u_prev) < self.action_convergence_tol\ - and np.linalg.norm(x_val - self.x_prev) < self.action_convergence_tol: + and np.linalg.norm(x_val - self.x_prev) < self.action_convergence_tol: break self.u_prev, self.x_prev = u_val, x_val print(f'Number of SQP iterations: {i}') @@ -369,7 +368,7 @@ def select_action(self, obs, info=None): self.last_obs = obs self.last_action = action return action - + def select_action_with_sqp_gp(self, obs): if self.x_guess is None or self.u_guess is None: self.compute_initial_guess(obs, self.get_references()) @@ -386,7 +385,7 @@ def select_action_with_sqp_gp(self, obs): # Assign the initial state. opti.set_value(x_init, obs) - # Assign reference trajectory within horizon. + # Assign reference trajectory within horizon. goal_states = self.get_references() opti.set_value(x_ref, goal_states) opti.set_value(x_guess, self.x_guess) @@ -445,7 +444,7 @@ def reset(self): self.traj = self.env.X_GOAL.T self.traj_step = 0 # Dynamics model. - + if self.gaussian_process is not None: self.set_lin_gp_dynamics_func() self.setup_sqp_gp_optimizer() @@ -475,7 +474,7 @@ def preprocess_training_data(self, np.array: inputs for GP training, (N, nx+nu). np.array: targets for GP training, (N, nx). ''' - print("=========== Preprocessing training data for SQP ===========") + print('=========== Preprocessing training data for SQP ===========') # Get the predicted dynamics. This is a linear prior, thus we need to account for the fact that # it is linearized about an eq using self.X_GOAL and self.U_GOAL. x_pred_seq = self.prior_dynamics_func(x0=x_seq.T, diff --git a/safe_control_gym/controllers/mpc/sqp_mpc.py b/safe_control_gym/controllers/mpc/sqp_mpc.py index b21621a0b..54e6ee807 100644 --- a/safe_control_gym/controllers/mpc/sqp_mpc.py +++ b/safe_control_gym/controllers/mpc/sqp_mpc.py @@ -6,15 +6,17 @@ import casadi as cs import numpy as np +from safe_control_gym.controllers.lqr.lqr_utils import discretize_linear_system from safe_control_gym.controllers.mpc.mpc import MPC from safe_control_gym.controllers.mpc.mpc_utils import (compute_discrete_lqr_gain_from_cont_linear_system, compute_state_rmse, get_cost_weight_matrix, reset_constraints, rk_discrete) -from safe_control_gym.controllers.lqr.lqr_utils import discretize_linear_system from safe_control_gym.envs.benchmark_env import Task from safe_control_gym.envs.constraints import GENERAL_CONSTRAINTS, create_constraint_list + # from safe_control_gym.controllers.mpc.sqp_mpc_utils import get_cost + class SQPMPC(MPC): '''Model Predictive Control using Sequential Quadratic Programming (SQP).''' @@ -80,11 +82,11 @@ def __init__( self.env) self.additional_constraints = additional_constraintsList.constraints self.constraints, self.state_constraints_sym, self.input_constraints_sym \ - = reset_constraints(self.env.constraints.constraints - + self.additional_constraints) + = reset_constraints(self.env.constraints.constraints + + self.additional_constraints) else: self.constraints, self.state_constraints_sym, self.input_constraints_sym \ - = reset_constraints(self.env.constraints.constraints) + = reset_constraints(self.env.constraints.constraints) self.additional_constraints = [] # Model parameters self.model = self.get_prior(self.env) @@ -100,7 +102,7 @@ def __init__( # self.X_EQ = self.env.X_GOAL # self.U_EQ = self.env.U_GOAL - self.init_step_solver = 'ipopt' # for nonlinear warmstart + self.init_step_solver = 'ipopt' # for nonlinear warmstart self.qp_solver = 'qrqp' self.max_qp_iter = 50 self.action_convergence_tol = 1e-3 @@ -115,8 +117,8 @@ def set_lin_dynamics_func(self, exact=True): x_guess = cs.MX.sym('x_guess', self.model.nx, 1) u_guess = cs.MX.sym('u_guess', self.model.nu, 1) dfdxdfdu = self.model.df_func(x=x_guess, u=u_guess) - dfdx = dfdxdfdu['dfdx']#.toarray() - dfdu = dfdxdfdu['dfdu']#.toarray() + dfdx = dfdxdfdu['dfdx'] # .toarray() + dfdu = dfdxdfdu['dfdu'] # .toarray() # if exact: # NOTE: exact is not implemented because cs.expm is not supported # # M = cs.SX.zeros(self.model.nx + self.model.nu, self.model.nx + self.model.nu) # # M[:self.model.nx, :self.model.nx] = dfdx @@ -126,7 +128,7 @@ def set_lin_dynamics_func(self, exact=True): # Md = cs.expm(M * self.dt) # Ad = Md[:self.model.nx, :self.model.nx] # Bd = Md[:self.model.nx, self.model.nx:] - # else: + # else: Ad = cs.DM_eye(self.model.nx) + dfdx * self.dt Bd = dfdu * self.dt @@ -136,7 +138,6 @@ def set_lin_dynamics_func(self, exact=True): [x_dot_lin, Ad, Bd], ['x0', 'p', 'x_guess', 'u_guess'], ['xf', 'Ad', 'Bd']) - def reset(self): '''Prepares for training or evaluation.''' @@ -156,7 +157,7 @@ def reset(self): # Previously solved states & inputs, useful for warm start. # nominal solution - self.x_prev = None + self.x_prev = None self.u_prev = None # # previous delta solution self.x_guess = None @@ -167,7 +168,7 @@ def reset(self): # self.setup_optimizer() # self.setup_sqp_optimizer() self.setup_results_dict() - + def compute_initial_guess(self, init_state, goal_states): print('=============Computing initial guess=============') time_before = time.time() @@ -175,12 +176,12 @@ def compute_initial_guess(self, init_state, goal_states): self.setup_optimizer(solver=self.init_step_solver) opti_dict = self.opti_dict opti = opti_dict['opti'] - x_var = opti_dict['x_var'] # optimization variables - u_var = opti_dict['u_var'] # optimization variables - x_init = opti_dict['x_init'] # initial state - x_ref = opti_dict['x_ref'] # reference state/trajectory + x_var = opti_dict['x_var'] # optimization variables + u_var = opti_dict['u_var'] # optimization variables + x_init = opti_dict['x_init'] # initial state + x_ref = opti_dict['x_ref'] # reference state/trajectory # Assign the initial state. - opti.set_value(x_init, init_state) # initial state should have dim (nx,) + opti.set_value(x_init, init_state) # initial state should have dim (nx,) # Assign reference trajectory within horizon. goal_states = self.get_references() opti.set_value(x_ref, goal_states) @@ -249,8 +250,8 @@ def setup_sqp_optimizer(self): R=self.R)['l'] for i in range(self.T): # Dynamics constraints. - next_state = self.linear_dynamics_func(x0=x_var[:, i], p=u_var[:, i], - x_guess=x_guess[:,i], u_guess=u_guess[:,i])['xf'] + next_state = self.linear_dynamics_func(x0=x_var[:, i], p=u_var[:, i], + x_guess=x_guess[:, i], u_guess=u_guess[:, i])['xf'] opti.subject_to(x_var[:, i + 1] == next_state) # State and input constraints soft_con_coeff = 10 @@ -282,7 +283,7 @@ def setup_sqp_optimizer(self): # initial condition constraints opti.subject_to(x_var[:, 0] + x_guess[:, 0] == x_init) opti.minimize(cost) - # create solver + # create solver opts = {'expand': True} # if platform == 'linux': # opts.update({'print_time': 1, 'print_header': 0}) @@ -306,10 +307,10 @@ def setup_sqp_optimizer(self): } after_optimizer_setup = time.time() print('MPC setup_sqp_optimizer time: ', after_optimizer_setup - before_optimizer_setup) - + def select_action(self, obs, info=None): before_select_action = time.time() - # use nonlinear solver to get an initial guess at initial step + # use nonlinear solver to get an initial guess at initial step if self.x_guess is None or self.u_guess is None: self.compute_initial_guess(obs, self.get_references()) @@ -343,9 +344,9 @@ def select_action(self, obs, info=None): return action def select_qp_action(self, - obs, - info=None - ): + obs, + info=None + ): '''Solve nonlinear mpc problem to get next action. Args: @@ -422,6 +423,3 @@ def select_qp_action(self, # action += self.u_guess[0] # self.prev_action = action # return action - - - \ No newline at end of file diff --git a/safe_control_gym/controllers/mpc/sqp_mpc_utils.py b/safe_control_gym/controllers/mpc/sqp_mpc_utils.py index 48e6c3224..ec1c42ea9 100644 --- a/safe_control_gym/controllers/mpc/sqp_mpc_utils.py +++ b/safe_control_gym/controllers/mpc/sqp_mpc_utils.py @@ -17,7 +17,7 @@ def get_cost(r, Q, n_lookahead): S (np.array): The cost matrix Q (np.array): The state cost matrix R (np.array): The actuation cost matrix - + ''' I_r = np.eye(r.shape[0]) @@ -33,6 +33,5 @@ def get_cost(r, Q, n_lookahead): assert S.shape[0] == nx * (n_lookahead + 1) + nu * n_lookahead assert cost_action.shape[0] == nu * n_lookahead assert cost_state.shape[0] == nx * (n_lookahead + 1) - - return S, cost_state, cost_action + return S, cost_state, cost_action diff --git a/safe_control_gym/controllers/pid/pid.py b/safe_control_gym/controllers/pid/pid.py index 7f32f9287..1c928d20c 100644 --- a/safe_control_gym/controllers/pid/pid.py +++ b/safe_control_gym/controllers/pid/pid.py @@ -147,8 +147,8 @@ def select_action(self, obs, info=None): action = self.KF * action**2 if self.env.QUAD_TYPE == 2: action = np.array([action[0] + action[3], action[1] + action[2]]) - elif self.env.QUAD_TYPE == 4: # 2D quadrotor with attitude control - action = np.array([self.env.attitude_control.pwm2thrust(thrust/3)*4, computed_target_rpy[1]]) + elif self.env.QUAD_TYPE == 4: # 2D quadrotor with attitude control + action = np.array([self.env.attitude_control.pwm2thrust(thrust / 3) * 4, computed_target_rpy[1]]) return action diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py index 0c82b13fd..98578cbbe 100644 --- a/safe_control_gym/controllers/ppo/ppo.py +++ b/safe_control_gym/controllers/ppo/ppo.py @@ -332,6 +332,7 @@ def log_step(self, { 'ep_length': ep_lengths.mean(), 'ep_return': ep_returns.mean(), + 'ep_return_std': ep_returns.std(), 'ep_reward': (ep_returns / ep_lengths).mean(), 'ep_constraint_violation': ep_constraint_violation.mean() }, diff --git a/safe_control_gym/controllers/sac/sac.py b/safe_control_gym/controllers/sac/sac.py index 3496fc1d7..fb9b78cc8 100644 --- a/safe_control_gym/controllers/sac/sac.py +++ b/safe_control_gym/controllers/sac/sac.py @@ -368,6 +368,7 @@ def log_step(self, results): { 'ep_length': ep_lengths.mean(), 'ep_return': ep_returns.mean(), + 'ep_return_std': ep_returns.std(), 'ep_reward': (ep_returns / ep_lengths).mean(), 'ep_constraint_violation': ep_constraint_violation.mean() }, @@ -387,6 +388,7 @@ def log_step(self, results): { 'ep_length': eval_ep_lengths.mean(), 'ep_return': eval_ep_returns.mean(), + 'ep_return_std': eval_ep_returns.std(), 'ep_reward': (eval_ep_returns / eval_ep_lengths).mean(), 'constraint_violation': eval_constraint_violation.mean(), 'mse': eval_mse.mean() diff --git a/safe_control_gym/controllers/sac/sac_utils.py b/safe_control_gym/controllers/sac/sac_utils.py index 3e2e73e33..d4bbdfb44 100644 --- a/safe_control_gym/controllers/sac/sac_utils.py +++ b/safe_control_gym/controllers/sac/sac_utils.py @@ -149,7 +149,7 @@ def update(self, batch): # actor update policy_loss, entropy_loss = self.compute_policy_loss(batch) - if self.count%self.update_freq == 0: + if self.count % self.update_freq == 0: self.actor_opt.zero_grad() policy_loss.backward() self.actor_opt.step() @@ -166,7 +166,7 @@ def update(self, batch): self.critic_opt.step() # update target networks - if self.count%self.update_freq == 0: + if self.count % self.update_freq == 0: soft_update(self.ac, self.ac_targ, self.tau) self.count += 1 @@ -197,10 +197,10 @@ def __init__(self, obs_dim, act_dim, action_space, hidden_dims, activation, post # action rescaling (from cleanrl) self.register_buffer( - "action_scale", torch.tensor((action_space.high - action_space.low) / 2.0, dtype=torch.float32).flatten() + 'action_scale', torch.tensor((action_space.high - action_space.low) / 2.0, dtype=torch.float32).flatten() ) self.register_buffer( - "action_bias", torch.tensor((action_space.high + action_space.low) / 2.0, dtype=torch.float32).flatten() + 'action_bias', torch.tensor((action_space.high + action_space.low) / 2.0, dtype=torch.float32).flatten() ) def forward(self, obs, deterministic=False, with_logprob=True): @@ -237,7 +237,7 @@ def forward(self, obs, deterministic=False, with_logprob=True): logp = logp.sum(1, keepdim=True) else: logp = None - + return action, logp diff --git a/safe_control_gym/controllers/td3/td3.yaml b/safe_control_gym/controllers/td3/td3.yaml index 6cecd6667..023a28e66 100644 --- a/safe_control_gym/controllers/td3/td3.yaml +++ b/safe_control_gym/controllers/td3/td3.yaml @@ -9,16 +9,12 @@ clip_reward: 10. # loss args gamma: 0.99 tau: 0.005 -init_temperature: 0.2 -use_entropy_tuning: False -target_entropy: null # optim args train_interval: 100 train_batch_size: 64 actor_lr: 0.001 critic_lr: 0.001 -entropy_lr: 0.001 # runner args max_env_steps: 1000000 diff --git a/safe_control_gym/envs/__init__.py b/safe_control_gym/envs/__init__.py index 673e01a74..aca3777ce 100644 --- a/safe_control_gym/envs/__init__.py +++ b/safe_control_gym/envs/__init__.py @@ -1,4 +1,4 @@ -'''Register environments.''' +"""Register environments.""" from safe_control_gym.utils.registration import register @@ -16,4 +16,4 @@ register(idx='shower', entry_point='safe_control_gym.envs.test_shower.shower:ShowerEnv', - config_entry_point='safe_control_gym.envs.test_shower:shower.yaml') \ No newline at end of file + config_entry_point='safe_control_gym.envs.test_shower:shower.yaml') diff --git a/safe_control_gym/envs/benchmark_env.py b/safe_control_gym/envs/benchmark_env.py index 00b069368..33bda0faa 100644 --- a/safe_control_gym/envs/benchmark_env.py +++ b/safe_control_gym/envs/benchmark_env.py @@ -478,10 +478,10 @@ def after_step(self, obs, rew, done, info): info (dict): The info after this step. Returns: - obs (ndarray): The udpdated observation after this step. - rew (float): The udpdated reward after this step. + obs (ndarray): The updated observation after this step. + rew (float): The updated reward after this step. done (bool): Whether the evaluation is done. - info (dict): The udpdated info after this step. + info (dict): The updated info after this step. """ # Increment counters self.pyb_step_counter += self.PYB_STEPS_PER_CTRL diff --git a/safe_control_gym/envs/constraints.py b/safe_control_gym/envs/constraints.py index 89a40b06b..1a8d82b5e 100644 --- a/safe_control_gym/envs/constraints.py +++ b/safe_control_gym/envs/constraints.py @@ -420,9 +420,9 @@ def __init__(self, constrained_variable (ConstrainedVariableType): Specifies the input type to the constraint as a constraint that acts on the state, input, or both. bound (list, np.array): 1D array or list of the bounds. Length must match - the environemt observation space dimension. If none, the env defaults are used + the environment observation space dimension. If none, the env defaults are used strict (optional, bool): Whether the constraint is violated also when equal to its threshold. - active_dims (list of ints): Filters the constraint to only act on select certian dimensions. + active_dims (list of ints): Filters the constraint to only act on select certain dimensions. tolerance (list or np.array): The distance from the constraint at which is_almost_active returns True. decimals (optional, int): Specifies the number of decimal places to round the constraint evaluation too. ''' @@ -649,7 +649,7 @@ def create_constraint_list(constraint_specs, available_constraints, env): '''Creates a ConstraintList from yaml constraint specification. Args: - constraint_specs (list): List of dicts defining the constraints info. + constraint_specs (list): List of dicts defining the constraints' info. available_constraints (dict): Dict of the constraints that are available env (BenchmarkEnv): The environment for which the constraints will be applied ''' diff --git a/safe_control_gym/envs/gym_control/cartpole.py b/safe_control_gym/envs/gym_control/cartpole.py index c89f1eaef..86ebe8c95 100644 --- a/safe_control_gym/envs/gym_control/cartpole.py +++ b/safe_control_gym/envs/gym_control/cartpole.py @@ -420,7 +420,7 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): Ur = cs.MX.sym('Ur', nu, 1) cost_func = 0.5 * (X - Xr).T @ Q @ (X - Xr) + 0.5 * (U - Ur).T @ R @ (U - Ur) # Define dynamics and cost dictionaries. - dynamics = {'dyn_eqn': X_dot, 'obs_eqn': Y, 'vars': {'X': X, 'U': U},} + dynamics = {'dyn_eqn': X_dot, 'obs_eqn': Y, 'vars': {'X': X, 'U': U}, } cost = {'cost_func': cost_func, 'vars': {'X': X, 'U': U, 'Xr': Xr, 'Ur': Ur, 'Q': Q, 'R': R}} # Additional params to cache params = { @@ -454,10 +454,10 @@ def _set_observation_space(self): self.x_dot_threshold = 10 self.theta_dot_threshold = 10 # Limit set to 2x: i.e. a failing observation is still within bounds. - obs_bound = np.array([self.x_threshold * 2, - self.x_dot_threshold, #np.finfo(np.float32).max, - self.theta_threshold_radians * 2, - self.theta_dot_threshold]) # np.finfo(np.float32).max + obs_bound = np.array([self.x_threshold * 2, + self.x_dot_threshold, # np.finfo(np.float32).max, + self.theta_threshold_radians * 2, + self.theta_dot_threshold]) # np.finfo(np.float32).max self.state_space = spaces.Box(low=-obs_bound, high=obs_bound, dtype=np.float32) # Concatenate goal info for RL diff --git a/safe_control_gym/envs/gym_pendulum/pendulum.py b/safe_control_gym/envs/gym_pendulum/pendulum.py index 91036b3dd..9ae809fad 100644 --- a/safe_control_gym/envs/gym_pendulum/pendulum.py +++ b/safe_control_gym/envs/gym_pendulum/pendulum.py @@ -292,7 +292,7 @@ def reset(self, seed=None, init_state=None): self.OVERRIDDEN_POLE_MASS = prop_values['pole_mass'] # See `slender rod`, https://en.wikipedia.org/wiki/List_of_moments_of_inertia. # OVERRIDDEN_POLE_INERTIA = (1 / 12) * self.OVERRIDDEN_POLE_MASS * (2 * self.OVERRIDDEN_EFFECTIVE_POLE_LENGTH)**2 - OVERRIDDEN_POLE_INERTIA = (1 / 3) * self.OVERRIDDEN_POLE_MASS * self.OVERRIDDEN_EFFECTIVE_POLE_LENGTH**2 # pole mass at the end of the rod + OVERRIDDEN_POLE_INERTIA = (1 / 3) * self.OVERRIDDEN_POLE_MASS * self.OVERRIDDEN_EFFECTIVE_POLE_LENGTH**2 # pole mass at the end of the rod # Load the cartpole with new urdf. override_urdf_tree = self._create_urdf(self.URDF_PATH, length=self.OVERRIDDEN_EFFECTIVE_POLE_LENGTH, inertia=OVERRIDDEN_POLE_INERTIA) # self.override_path = os.path.join(self.output_dir, f'pid-{os.getpid()}_id-{self.idx}_cartpole.urdf') @@ -306,7 +306,7 @@ def reset(self, seed=None, init_state=None): # Remove cache file after loading it into PyBullet. os.remove(self.override_path) # Pendulum settings. - # for link_idx in [-1, 0, 1]: # Slider, cart, and pole. # why is slider -1? + # for link_idx in [-1, 0, 1]: # Slider, cart, and pole. # why is slider -1? for link_idx in [-1, 0]: # fixed cart and pole. p.changeDynamics(self.PENDULUM_ID, linkIndex=link_idx, linearDamping=0, angularDamping=0, physicsClientId=self.PYB_CLIENT) # for joint_idx in [0, 1]: # Slider-to-cart and cart-to-pole joints. @@ -438,7 +438,7 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): Xr = cs.MX.sym('Xr', nx, 1) Ur = cs.MX.sym('Ur', nu, 1) cost_func = 0.5 * (self.wrap_sym(X) - Xr).T @ Q @ (self.wrap_sym(X) - Xr) \ - + 0.5 * (U - Ur).T @ R @ (U - Ur) + + 0.5 * (U - Ur).T @ R @ (U - Ur) # cost_func = self.cost_func(X, U, Xr, Ur, Q, R) # Define dynamics and cost dictionaries. # dynamics = {'dyn_eqn': X_dot, 'obs_eqn': Y, 'vars': {'X': X, 'U': U}} @@ -455,7 +455,7 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): } # Setup symbolic model. self.symbolic = SymbolicModel(dynamics=dynamics, cost=cost, dt=dt, params=params) - + def wrap_sym(self, X): '''Wrap angle to [-pi, pi] when used in observation. @@ -680,7 +680,7 @@ def _get_reward(self): if self.TASK == Task.STABILIZATION: return float( -1 * self.symbolic.loss(x=self.state, - # -1 * self.symbolic.loss(x=state, + # -1 * self.symbolic.loss(x=state, Xr=self.X_GOAL, u=self.current_clipped_action, Ur=self.U_GOAL, @@ -714,7 +714,7 @@ def _get_done(self): # x, _, theta, _ = self.state theta, _ = self.state # if x < -self.x_threshold or x > self.x_threshold or theta < -self.theta_threshold_radians or theta > self.theta_threshold_radians: - if theta < -self.theta_threshold_radians or theta > self.theta_threshold_radians: + if theta < -self.theta_threshold_radians or theta > self.theta_threshold_radians: self.out_of_bounds = True return True self.out_of_bounds = False @@ -770,14 +770,14 @@ def _parse_urdf_parameters(self, file_name): ''' URDF_TREE = (etxml.parse(file_name)).getroot() # EFFECTIVE_POLE_LENGTH = 0.5 * float(URDF_TREE[3][0][0][0].attrib['size'].split(' ')[-1]) # Note: HALF length of pole. - # POLE_MASS = float(URDF_TREE[3][1][1].attrib['value']) - # CART_MASS = float(URDF_TREE[1][2][0].attrib['value']) - # return EFFECTIVE_POLE_LENGTH, POLE_MASS, CART_MASS - EFFECTIVE_POLE_LENGTH = float(URDF_TREE[1][0][0][0].attrib['size'].split(' ')[-1]) # Note: full length of pole. + # POLE_MASS = float(URDF_TREE[3][1][1].attrib['value']) + # CART_MASS = float(URDF_TREE[1][2][0].attrib['value']) + # return EFFECTIVE_POLE_LENGTH, POLE_MASS, CART_MASS + EFFECTIVE_POLE_LENGTH = float(URDF_TREE[1][0][0][0].attrib['size'].split(' ')[-1]) # Note: full length of pole. POLE_MASS = float(URDF_TREE[1][1][1].attrib['value']) - # print('pole length:', EFFECTIVE_POLE_LENGTH) - # print('pole mass:', POLE_MASS) - # print(URDF_TREE[3][0][0][0].attrib['size'].split(' ')[-1]) + # print('pole length:', EFFECTIVE_POLE_LENGTH) + # print('pole mass:', POLE_MASS) + # print(URDF_TREE[3][0][0][0].attrib['size'].split(' ')[-1]) return EFFECTIVE_POLE_LENGTH, POLE_MASS def _create_urdf(self, file_name, length=None, inertia=None): diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py index dc1494b0a..a6328d783 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py +++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py @@ -15,7 +15,8 @@ from safe_control_gym.envs.benchmark_env import Cost, Task from safe_control_gym.envs.constraints import GENERAL_CONSTRAINTS from safe_control_gym.envs.gym_pybullet_drones.base_aviary import BaseAviary -from safe_control_gym.envs.gym_pybullet_drones.quadrotor_utils import QuadType, AttitudeControl, cmd2pwm, pwm2rpm +from safe_control_gym.envs.gym_pybullet_drones.quadrotor_utils import (AttitudeControl, QuadType, cmd2pwm, + pwm2rpm) from safe_control_gym.math_and_models.symbolic_systems import SymbolicModel from safe_control_gym.math_and_models.transformations import csRotXYZ, transform_trajectory @@ -166,11 +167,11 @@ def __init__(self, inertial_prop (ndarray, optional): The inertial properties of the environment (M, Ixx, Iyy, Izz). quad_type (QuadType, optional): The choice of motion type (1D along z, 2D in the x-z plane, or 3D). norm_act_scale (float): Scaling the [-1,1] action space around hover thrust when `normalized_action_space` is True. - obs_goal_horizon (int): How many future goal states to append to obervation. + obs_goal_horizon (int): How many future goal states to append to observation. rew_state_weight (list/ndarray): Quadratic weights for state in rl reward. rew_act_weight (list/ndarray): Quadratic weights for action in rl reward. - rew_exponential (bool): If to exponentiate negative quadratic cost to positive, bounded [0,1] reward. - done_on_out_of_bound (bool): If to termiante when state is out of bound. + rew_exponential (bool): If to exponential negative quadratic cost to positive, bounded [0,1] reward. + done_on_out_of_bound (bool): If to terminate when state is out of bound. info_mse_metric_state_weight (list/ndarray): Quadratic weights for state in mse calculation for info dict. """ @@ -505,12 +506,12 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): prior_prop (dict): specify the prior inertial prop to use in the symbolic model. """ # if self.QUAD_TYPE is QuadType.TWO_D_ATTITUDE: - # params_pitch_rate = prior_prop.get('params_pitch_rate', + # params_pitch_rate = prior_prop.get('params_pitch_rate', # params_acc = # else: m = prior_prop.get('M', self.MASS) Iyy = prior_prop.get('Iyy', self.J[1, 1]) - + g, length = self.GRAVITY_ACC, self.L dt = self.CTRL_TIMESTEP # Define states. @@ -558,8 +559,8 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): theta_dot = cs.MX.sym('theta_dot') X = cs.vertcat(x, x_dot, z, z_dot, theta, theta_dot) # Define input collective thrust and theta. - T = cs.MX.sym('T_c') # normlized thrust [N] - P = cs.MX.sym('P_c') # desired pitch angle [rad] + T = cs.MX.sym('T_c') # normlized thrust [N] + P = cs.MX.sym('P_c') # desired pitch angle [rad] U = cs.vertcat(T, P) # The thrust in PWM is converted from the normalized thrust. # With the formulat F_desired = b_F * T + a_F @@ -693,13 +694,13 @@ def _set_action_space(self): a_low = self.KF * n_mot * (self.PWM2RPM_SCALE * self.MIN_PWM + self.PWM2RPM_CONST)**2 a_high = self.KF * n_mot * (self.PWM2RPM_SCALE * self.MAX_PWM + self.PWM2RPM_CONST)**2 self.physical_action_bounds = (np.array([np.full(1, a_low, np.float32), np.full(1, -max_pitch_rad, np.float32)]).flatten(), - np.array([np.full(1, a_high, np.float32), np.full(1, max_pitch_rad, np.float32)]).flatten()) + np.array([np.full(1, a_high, np.float32), np.full(1, max_pitch_rad, np.float32)]).flatten()) else: n_mot = 4 / action_dim a_low = self.KF * n_mot * (self.PWM2RPM_SCALE * self.MIN_PWM + self.PWM2RPM_CONST)**2 a_high = self.KF * n_mot * (self.PWM2RPM_SCALE * self.MAX_PWM + self.PWM2RPM_CONST)**2 self.physical_action_bounds = (np.full(action_dim, a_low, np.float32), - np.full(action_dim, a_high, np.float32)) + np.full(action_dim, a_high, np.float32)) if self.NORMALIZED_RL_ACTION_SPACE: # Normalized thrust (around hover thrust). @@ -708,13 +709,13 @@ def _set_action_space(self): else: self.hover_thrust = self.GRAVITY_ACC * self.MASS / action_dim self.action_space = spaces.Box(low=-np.ones(action_dim), - high=np.ones(action_dim), - dtype=np.float32) + high=np.ones(action_dim), + dtype=np.float32) else: # Direct thrust control. self.action_space = spaces.Box(low=self.physical_action_bounds[0], - high=self.physical_action_bounds[1], - dtype=np.float32) + high=self.physical_action_bounds[1], + dtype=np.float32) def _set_observation_space(self): """Sets the observation space of the environment.""" @@ -748,7 +749,7 @@ def _set_observation_space(self): high = np.array([ self.x_threshold, self.x_dot_threshold, self.z_threshold, self.z_dot_threshold, - self.theta_threshold_radians, self.theta_dot_threshold_radians + self.theta_threshold_radians, self.theta_dot_threshold_radians ]) self.STATE_LABELS = ['x', 'x_dot', 'z', 'z_dot', 'theta', 'theta_dot'] self.STATE_UNITS = ['m', 'm/s', 'm', 'm/s', 'rad', 'rad/s'] @@ -831,19 +832,19 @@ def _preprocess_control(self, action): if self.adversary_disturbance == 'action': self.current_physical_action = self.current_physical_action + self.adv_action self.current_noisy_physical_action = self.current_physical_action - + if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE: collective_thrust, pitch = action - # rpm = self.attitude_control._dslPIDAttitudeControl(indivisual_thrust, + # rpm = self.attitude_control._dslPIDAttitudeControl(individual_thrust, # self.quat[0], np.array([0, pitch, 0])) # input thrsut is pwm # thrust_action = self.KF * rpm**2 - # thrust_action = self.attitude_control._dslPIDAttitudeControl(self.attitude_control.pwm2thrust(thrust_c/3), + # thrust_action = self.attitude_control._dslPIDAttitudeControl(self.attitude_control.pwm2thrust(thrust_c/3), # self.quat[0], np.array([0, pitch, 0])) # input thrsut is in Newton # print(f"collective_thrust: {collective_thrust}, pitch: {pitch}") - thrust_action = self.attitude_control._dslPIDAttitudeControl(collective_thrust/4, - self.quat[0], np.array([0, pitch, 0])) # input thrsut is in Newton + thrust_action = self.attitude_control._dslPIDAttitudeControl(collective_thrust / 4, + self.quat[0], np.array([0, pitch, 0])) # input thrsut is in Newton thrust = np.array([thrust_action[0] + thrust_action[3], thrust_action[1] + thrust_action[2]]) - thrust = np.clip(thrust, np.full(2, self.physical_action_bounds[0][0]/2), np.full(2, self.physical_action_bounds[1][0]/2)) + thrust = np.clip(thrust, np.full(2, self.physical_action_bounds[0][0] / 2), np.full(2, self.physical_action_bounds[1][0] / 2)) pitch = np.clip(pitch, self.physical_action_bounds[0][1], self.physical_action_bounds[1][1]) self.current_clipped_action = np.array([sum(thrust), pitch]) else: @@ -853,7 +854,7 @@ def _preprocess_control(self, action): # convert to quad motor rpm commands pwm = cmd2pwm(thrust, self.PWM2RPM_SCALE, self.PWM2RPM_CONST, self.KF, self.MIN_PWM, self.MAX_PWM) rpm = pwm2rpm(pwm, self.PWM2RPM_SCALE, self.PWM2RPM_CONST) - + return rpm def normalize_action(self, action): @@ -890,7 +891,7 @@ def denormalize_action(self, action): # hover_pwm = (self.HOVER_RPM - self.PWM2RPM_CONST) / self.PWM2RPM_SCALE # thrust = np.where(thrust <= 0, self.MIN_PWM + (thrust + 1) * (hover_pwm - self.MIN_PWM), # hover_pwm + (self.MAX_PWM - hover_pwm) * thrust) - + thrust = (1 + self.norm_act_scale * action[0]) * self.hover_thrust # thrust = self.attitude_control.thrust2pwm(thrust) diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py index e33346cc8..5669ac144 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py +++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor_utils.py @@ -1,7 +1,7 @@ -'''Helper functions for the quadrotor environment.''' +"""Helper functions for the quadrotor environment.""" -from enum import IntEnum from abc import ABC +from enum import IntEnum import numpy as np import pybullet as p @@ -9,7 +9,7 @@ class QuadType(IntEnum): - '''Quadrotor types numeration class.''' + """Quadrotor types numeration class.""" ONE_D = 1 # One-dimensional (along z) movement. TWO_D = 2 # Two-dimensional (in the x-z plane) movement. @@ -18,7 +18,7 @@ class QuadType(IntEnum): def cmd2pwm(thrust, pwm2rpm_scale, pwm2rpm_const, ct, pwm_min, pwm_max): - '''Generic cmd to pwm function. + """Generic cmd to pwm function. For 1D, thrust is the total of all 4 motors; for 2D, 1st thrust is total of motor 1 & 4, 2nd thrust is total of motor 2 & 3; for 4D, thrust is thrust of each motor. @@ -33,7 +33,7 @@ def cmd2pwm(thrust, pwm2rpm_scale, pwm2rpm_const, ct, pwm_min, pwm_max): Returns: ndarray: array of length 4 containing PWM. - ''' + """ n_motor = 4 // int(thrust.size) thrust = np.clip(thrust, np.zeros_like(thrust), None) # Make sure thrust is not negative. motor_pwm = (np.sqrt(thrust / n_motor / ct) - pwm2rpm_const) / pwm2rpm_scale @@ -50,7 +50,7 @@ def cmd2pwm(thrust, pwm2rpm_scale, pwm2rpm_const, ct, pwm_min, pwm_max): def pwm2rpm(pwm, pwm2rpm_scale, pwm2rpm_const): - '''Computes motor squared rpm from pwm. + """Computes motor squared rpm from pwm. Args: pwm (ndarray): Array of length 4 containing PWM. @@ -59,13 +59,13 @@ def pwm2rpm(pwm, pwm2rpm_scale, pwm2rpm_const): Returns: ndarray: Array of length 4 containing RPMs. - ''' + """ rpm = pwm2rpm_scale * pwm + pwm2rpm_const return rpm class AttitudeControl(ABC): - '''AttitudeControl Class.''' + """AttitudeControl Class.""" def __init__(self, control_timestep, @@ -80,7 +80,7 @@ def __init__(self, min_pwm: float = 20000, max_pwm: float = 65535, ): - '''AttitudeControl class __init__ method. + """AttitudeControl class __init__ method. Args: control_timestep (float): The time step at which control is computed. @@ -94,8 +94,8 @@ def __init__(self, pwm2rpm_const (float, optional): PWM-to-RPM constant factor. min_pwm (float, optional): Minimum PWM. max_pwm (float, optional): Maximum PWM. - ''' - + """ + self.g = g self.KF = kf self.KM = km @@ -118,7 +118,7 @@ def __init__(self, self.control_timestep = control_timestep def reset(self): - '''Reinitialize just the controller before a new run.''' + """Reinitialize just the controller before a new run.""" # Clear PID control variables. self.last_rpy = np.zeros(3) @@ -130,47 +130,47 @@ def _dslPIDAttitudeControl(self, target_euler, target_rpy_rates=np.zeros(3) ): - """DSL's CF2.x PID attitude control. - - Parameters - ---------- - thrust : ndarray - (4,1)-shaped array of target thrust (Newton) along the drone z-axis. - cur_quat : ndarray - (4,1)-shaped array of floats containing the current orientation as a quaternion. - target_euler : ndarray - (3,1)-shaped array of floats containing the computed target Euler angles. - target_rpy_rates : ndarray - (3,1)-shaped array of floats containing the desired roll, pitch, and yaw rates. - - Returns - ------- - ndarray - (4,1)-shaped array of integers containing the RPMs to apply to each of the 4 motors. - - """ - cur_rotation = np.array(p.getMatrixFromQuaternion(cur_quat)).reshape(3, 3) - cur_rpy = np.array(p.getEulerFromQuaternion(cur_quat)) - target_quat = (Rotation.from_euler('XYZ', target_euler, degrees=False)).as_quat() - w, x, y, z = target_quat - target_rotation = (Rotation.from_quat([w, x, y, z])).as_matrix() - rot_matrix_e = np.dot((target_rotation.transpose()), cur_rotation) - np.dot(cur_rotation.transpose(), target_rotation) - rot_e = np.array([rot_matrix_e[2, 1], rot_matrix_e[0, 2], rot_matrix_e[1, 0]]) - rpy_rates_e = target_rpy_rates - (cur_rpy - self.last_rpy) / self.control_timestep - self.last_rpy = cur_rpy - self.integral_rpy_e = self.integral_rpy_e - rot_e * self.control_timestep - self.integral_rpy_e = np.clip(self.integral_rpy_e, -1500., 1500.) - self.integral_rpy_e[0:2] = np.clip(self.integral_rpy_e[0:2], -1., 1.) - #### PID target torques #################################### - target_torques = - np.multiply(self.P_COEFF_TOR, rot_e) \ - + np.multiply(self.D_COEFF_TOR, rpy_rates_e) \ - + np.multiply(self.I_COEFF_TOR, self.integral_rpy_e) - target_torques = np.clip(target_torques, -3200, 3200) - # pwm = thrust + np.dot(self.MIXER_MATRIX, target_torques) - # pwm = np.clip(pwm, self.MIN_PWM, self.MAX_PWM) - # return self.PWM2RPM_SCALE * pwm + self.PWM2RPM_CONST - return thrust + self.pwm2thrust(np.dot(self.MIXER_MATRIX, target_torques)) - + """DSL's CF2.x PID attitude control. + + Parameters + ---------- + thrust : ndarray + (4,1)-shaped array of target thrust (Newton) along the drone z-axis. + cur_quat : ndarray + (4,1)-shaped array of floats containing the current orientation as a quaternion. + target_euler : ndarray + (3,1)-shaped array of floats containing the computed target Euler angles. + target_rpy_rates : ndarray + (3,1)-shaped array of floats containing the desired roll, pitch, and yaw rates. + + Returns + ------- + ndarray + (4,1)-shaped array of integers containing the RPMs to apply to each of the 4 motors. + + """ + cur_rotation = np.array(p.getMatrixFromQuaternion(cur_quat)).reshape(3, 3) + cur_rpy = np.array(p.getEulerFromQuaternion(cur_quat)) + target_quat = (Rotation.from_euler('XYZ', target_euler, degrees=False)).as_quat() + w, x, y, z = target_quat + target_rotation = (Rotation.from_quat([w, x, y, z])).as_matrix() + rot_matrix_e = np.dot((target_rotation.transpose()), cur_rotation) - np.dot(cur_rotation.transpose(), target_rotation) + rot_e = np.array([rot_matrix_e[2, 1], rot_matrix_e[0, 2], rot_matrix_e[1, 0]]) + rpy_rates_e = target_rpy_rates - (cur_rpy - self.last_rpy) / self.control_timestep + self.last_rpy = cur_rpy + self.integral_rpy_e = self.integral_rpy_e - rot_e * self.control_timestep + self.integral_rpy_e = np.clip(self.integral_rpy_e, -1500., 1500.) + self.integral_rpy_e[0:2] = np.clip(self.integral_rpy_e[0:2], -1., 1.) + #### PID target torques #################################### + target_torques = - np.multiply(self.P_COEFF_TOR, rot_e) \ + + np.multiply(self.D_COEFF_TOR, rpy_rates_e) \ + + np.multiply(self.I_COEFF_TOR, self.integral_rpy_e) + target_torques = np.clip(target_torques, -3200, 3200) + # pwm = thrust + np.dot(self.MIXER_MATRIX, target_torques) + # pwm = np.clip(pwm, self.MIN_PWM, self.MAX_PWM) + # return self.PWM2RPM_SCALE * pwm + self.PWM2RPM_CONST + return thrust + self.pwm2thrust(np.dot(self.MIXER_MATRIX, target_torques)) + def pwm2thrust(self, pwm): """Convert pwm to thrust using a quadratic function.""" @@ -179,7 +179,7 @@ def pwm2thrust(self, pwm): # solve quadratic equation using abc formula thrust = (-self.b_coeff + np.sqrt(self.b_coeff**2 - 4 * self.a_coeff * (self.c_coeff - pwm_scaled))) / (2 * self.a_coeff) return thrust - + def thrust2pwm(self, thrust): """Convert thrust to pwm using a quadratic function.""" @@ -188,5 +188,3 @@ def thrust2pwm(self, thrust): pwm = np.minimum(pwm, 1.0) thrust_pwm = pwm * self.MAX_PWM return thrust_pwm - - diff --git a/safe_control_gym/experiments/base_experiment.py b/safe_control_gym/experiments/base_experiment.py index 5a6df5ed9..ece40bcdd 100644 --- a/safe_control_gym/experiments/base_experiment.py +++ b/safe_control_gym/experiments/base_experiment.py @@ -135,7 +135,7 @@ def _execute_evaluations(self, n_episodes=None, n_steps=None, done_on_max_steps= self.env.save_data() obs, info = self._evaluation_reset(ctrl_data=ctrl_data, sf_data=sf_data) break - # elif + # elif # otherwise, keep stepping elif n_steps is not None: @@ -477,7 +477,7 @@ def get_episode_returns(self, exponentiate=False): episode_rewards (list): The total reward of each episode. ''' return self.get_episode_data('reward', postprocess_func=sum, exponentiate=exponentiate) - + def get_episode_exponentiated_returns(self): '''Total reward/return of episodes. diff --git a/safe_control_gym/lyapunov/lyapunov.py b/safe_control_gym/lyapunov/lyapunov.py deleted file mode 100644 index 59dab9b24..000000000 --- a/safe_control_gym/lyapunov/lyapunov.py +++ /dev/null @@ -1,1132 +0,0 @@ - -from collections.abc import Sequence -import itertools - -import numpy as np -import torch - -myDevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - -# Add the configuration settings -class Configuration(object): - """Configuration class.""" - - def __init__(self): - """Initialization.""" - super(Configuration, self).__init__() - - # Dtype for computations - self.dtype = torch.float32 - ####################################################################### - # Batch size for stability verification - # TODO: change this back to 10000 in the future (by Mingxuan) - self.gp_batch_size = 100 # originally 10000 - ####################################################################### - - @property - def np_dtype(self): - """Return the numpy dtype.""" - return np.float32 - - def __repr__(self): - """Print the parameters.""" - params = ['Configuration parameters:', ''] - for param, value in self.__dict__.items(): - params.append('{}: {}'.format(param, value.__repr__())) - - return '\n'.join(params) - -config = Configuration() -del Configuration -_EPS = np.finfo(config.np_dtype).eps - -class DimensionError(Exception): - pass - -class GridWorld(object): - """Base class for function approximators on a regular grid. - - Parameters - ---------- - limits: 2d array-like - A list of limits. For example, [(x_min, x_max), (y_min, y_max)] - num_points: 1d array-like - The number of points with which to grid each dimension. - - NOTE: in original Lyapunov NN, the grid is defined in a normalized - fashion (i.e. [-1, 1] for each dimension) - """ - - def __init__(self, limits, num_points): - """Initialization, see `GridWorld`.""" - super(GridWorld, self).__init__() - - self.limits = np.atleast_2d(limits).astype(config.np_dtype) - num_points = np.broadcast_to(num_points, len(self.limits)) - self.num_points = num_points.astype(np.int16, copy=False) - self.state_dim = len(self.limits) - # print('self.state_dim: ', self.state_dim) - - if np.any(self.num_points < 2): - raise DimensionError('There must be at least 2 points in each ' - 'dimension.') - - # Compute offset and unit hyperrectangle - self.offset = self.limits[:, 0] - self.unit_maxes = ((self.limits[:, 1] - self.offset) - / (self.num_points - 1)).astype(config.np_dtype) - self.offset_limits = np.stack((np.zeros_like(self.limits[:, 0]), - self.limits[:, 1] - self.offset), - axis=1) - - # Statistics about the grid - self.discrete_points = [np.linspace(low, up, n, dtype=config.np_dtype) - for (low, up), n in zip(self.limits, - self.num_points)] - - self.nrectangles = np.prod(self.num_points - 1) - self.nindex = np.prod(self.num_points) - - self.ndim = len(self.limits) - self._all_points = None - - @property - def all_points(self): - """Return all the discrete points of the discretization. - - Returns - ------- - points : ndarray - An array with all the discrete points with size - (self.nindex, self.ndim). - - """ - if self._all_points is None: - # my own implementation - mesh = np.stack(np.meshgrid(*self.discrete_points),-1).reshape(-1,self.state_dim) - self._all_points = mesh.astype(config.np_dtype) - # if self.all_points.shape[1] == 2: - # swap the first two columns - # self._all_points[:,[0,1]] = self._all_points[:,[1,0]] - - # original implementation - # mesh = np.meshgrid(*self.discrete_points, indexing='ij') - # points = np.column_stack(col.ravel() for col in mesh) - # each row of the mesh is a point in the stat space - # self._all_points = points.astype(config.np_dtype) - - return self._all_points - - def __len__(self): - """Return the number of points in the discretization.""" - return self.nindex - - def sample_continuous(self, num_samples): - """Sample uniformly at random from the continuous domain. - - Parameters - ---------- - num_samples : int - - Returns - ------- - points : ndarray - Random points on the continuous rectangle. - - """ - limits = self.limits - rand = np.random.uniform(0, 1, size=(num_samples, self.ndim)) - return rand * np.diff(limits, axis=1).T + self.offset - - def sample_discrete(self, num_samples, replace=False): - """Sample uniformly at random from the discrete domain. - - Parameters - ---------- - num_samples : int - replace : bool, optional - Whether to sample with replacement. - - Returns - ------- - points : ndarray - Random points on the continuous rectangle. - - """ - idx = np.random.choice(self.nindex, size=num_samples, replace=replace) - return self.index_to_state(idx) - - def _check_dimensions(self, states): - """Raise an error if the states have the wrong dimension. - - Parameters - ---------- - states : ndarray - - """ - if not states.shape[1] == self.ndim: - raise DimensionError('the input argument has the wrong ' - 'dimensions.') - - def _center_states(self, states, clip=True): - """Center the states to the interval [0, x]. - - Parameters - ---------- - states : np.array - clip : bool, optinal - If False the data is not clipped to lie within the limits. - - Returns - ------- - offset_states : ndarray - - """ - states = np.atleast_2d(states).astype(config.np_dtype) - states = states - self.offset[None, :] - if clip: - np.clip(states, - self.offset_limits[:, 0] + 2 * _EPS, - self.offset_limits[:, 1] - 2 * _EPS, - out=states) - return states - - def index_to_state(self, indices): - """Convert indices to physical states. - - Parameters - ---------- - indices : ndarray (int) - The indices of points on the discretization. - - Returns - ------- - states : ndarray - The states with physical units that correspond to the indices. - - """ - indices = np.atleast_1d(indices) - ijk_index = np.vstack(np.unravel_index(indices, self.num_points)).T - ijk_index = ijk_index.astype(config.np_dtype) - return ijk_index * self.unit_maxes + self.offset - - def state_to_index(self, states): - """Convert physical states to indices. - - Parameters - ---------- - states: ndarray - Physical states on the discretization. - - Returns - ------- - indices: ndarray (int) - The indices that correspond to the physical states. - - """ - states = np.atleast_2d(states) - self._check_dimensions(states) - states = np.clip(states, self.limits[:, 0], self.limits[:, 1]) - states = (states - self.offset) * (1. / self.unit_maxes) - ijk_index = np.rint(states).astype(np.int32) - return np.ravel_multi_index(ijk_index.T, self.num_points) - - def state_to_rectangle(self, states): - """Convert physical states to its closest rectangle index. - - Parameters - ---------- - states : ndarray - Physical states on the discretization. - - Returns - ------- - rectangles : ndarray (int) - The indices that correspond to rectangles of the physical states. - - """ - ind = [] - for i, (discrete, num_points) in enumerate(zip(self.discrete_points, - self.num_points)): - idx = np.digitize(states[:, i], discrete) - idx -= 1 - np.clip(idx, 0, num_points - 2, out=idx) - - ind.append(idx) - return np.ravel_multi_index(ind, self.num_points - 1) - - def rectangle_to_state(self, rectangles): - """ - Convert rectangle indices to the states of the bottem-left corners. - - Parameters - ---------- - rectangles : ndarray (int) - The indices of the rectangles - - Returns - ------- - states : ndarray - The states that correspond to the bottom-left corners of the - corresponding rectangles. - - """ - rectangles = np.atleast_1d(rectangles) - ijk_index = np.vstack(np.unravel_index(rectangles, - self.num_points - 1)) - ijk_index = ijk_index.astype(config.np_dtype) - return (ijk_index.T * self.unit_maxes) + self.offset - - def rectangle_corner_index(self, rectangles): - """Return the index of the bottom-left corner of the rectangle. - - Parameters - ---------- - rectangles: ndarray - The indices of the rectangles. - - Returns - ------- - corners : ndarray (int) - The indices of the bottom-left corners of the rectangles. - - """ - ijk_index = np.vstack(np.unravel_index(rectangles, - self.num_points - 1)) - return np.ravel_multi_index(np.atleast_2d(ijk_index), - self.num_points) - -class QuadraticFunction(object): - """A quadratic function. - - values(x) = x.T P x - - Parameters - ---------- - matrix : np.array - 2d cost matrix for lyapunov function. - - """ - def __init__(self, matrix): - """Initialization, see `QuadraticLyapunovFunction`.""" - super(QuadraticFunction, self).__init__() - - self.matrix = np.atleast_2d(matrix).astype(config.np_dtype) - # print('self.matrix\n',self.matrix) - self.ndim = self.matrix.shape[0] - # with tf.variable_scope(self.scope_name): - # self.matrix = tf.Variable(self.matrix) - - def __call__(self, *args, **kwargs): - """Evaluate the function using the template to ensure variable sharing. - - Parameters - ---------- - args : list - The input arguments to the function. - kwargs : dict, optional - The keyword arguments to the function. - - Returns - ------- - outputs : list - The output arguments of the function as given by evaluate. - - """ - - outputs = self.forward(*args, **kwargs) - return outputs - - def forward(self, points): - """Like evaluate, but returns a tensor instead.""" - if isinstance(points, np.ndarray): - points = torch.from_numpy(points).float() - # linear_form = tf.matmul(points, self.matrix) - # print('points\n', points) - # print('points shape\n', points.shape) - # print('points type\n', type(points)) - # convert points to np array - if isinstance(points, torch.Tensor): - # if the tensor is on GPU, convert it to CPU first - if points.is_cuda: - points = points.cpu() - points = points.detach().numpy() - points = np.reshape(points, [-1]) - # print('points\n', points) - # reshape points to 1d array - - linear_form = points @ self.matrix - quadratic = linear_form @ points.T - # return tf.reduce_sum(quadratic, axis=1, keepdims=True) - # print('quadratic\n',quadratic) - return torch.tensor(quadratic) - - def gradient(self, points): - """Return the gradient of the function.""" - if isinstance(points, np.ndarray): - points = torch.from_numpy(points).float() - # return tf.matmul(points, self.matrix + self.matrix.T) - return torch.matmul(torch.tensor(points, dtype=config.dtype), \ - torch.tensor(self.matrix + self.matrix.T, dtype=config.dtype)) - -class LyapunovNN(torch.nn.Module): - # def __init__(self, dim_input, layer_dims, activations): - def __init__(self, input_dim, layer_dims, activations, eps=1e-6, device='cpu'): - super(LyapunovNN, self).__init__() - # network layers - self.input_dim = input_dim - self.num_layers = len(layer_dims) - self.activations = activations - self.eps = eps - self.layers = torch.nn.ModuleList() - self.kernel = [] - self.device = device - - if layer_dims[0] < input_dim: - raise ValueError('The first layer dimension must be at \ - least the input dimension!') - - if np.all(np.diff(layer_dims) >= 0): - self.output_dims = layer_dims - else: - raise ValueError('Each layer must maintain or increase \ - the dimension of its input!') - - self.hidden_dims = np.zeros(self.num_layers, dtype=int) - for i in range(self.num_layers): - if i == 0: - layer_input_dim = self.input_dim - else: - layer_input_dim = self.output_dims[i - 1] - self.hidden_dims[i] = np.ceil((layer_input_dim + 1) / 2).astype(int) - - # # build the nn structure - # self.linear1 = torch.nn.Linear(2, 2, bias=False) - # self.linear2 = torch.nn.Linear(2, 62, bias=False) - # self.linear3 = torch.nn.Linear(64, 33, bias=False) - # self.linear4 = torch.nn.Linear(64, 33, bias=False) - # W1 = self.linear1.weight - # W2 = self.linear2.weight - # # print('W1.shape\n', W1.shape) - # # print('W2.shape\n', W2.shape) - # inter_kernel = torch.matmul(W1.T, W1) + self.eps * torch.eye(W1.shape[1]) - # self.kernel_1 = torch.cat((inter_kernel, W2), dim=0) - # W3 = self.linear3.weight - # self.kernel_2 = torch.matmul(W3.T, W3) + self.eps * torch.eye(W3.shape[1]) - # W4 = self.linear4.weight - # self.kernel_3 = torch.matmul(W4.T, W4) + self.eps * torch.eye(W4.shape[1]) - - # def forward(self, x): - # if isinstance(x, np.ndarray): - # x = torch.from_numpy(x).float() - # x = self.activations[0](torch.matmul(self.kernel_1, x)) - # x = self.activations[1](torch.matmul(self.kernel_2, x)) - # x = self.activations[2](torch.matmul(self.kernel_3, x)) - # x = torch.sum(torch.square(x)) - # return x - - # def update_kernel(self): - # # update the kernel - # W1 = self.linear1.weight - # W2 = self.linear2.weight - # inter_kernel = torch.matmul(W1.T, W1) + self.eps * torch.eye(W1.shape[1]) - # self.kernel_1 = torch.cat((inter_kernel, W2), dim=0) - # W3 = self.linear3.weight - # self.kernel_2 = torch.matmul(W3.T, W3) + self.eps * torch.eye(W3.shape[1]) - # W4 = self.linear4.weight - # self.kernel_3 = torch.matmul(W4.T, W4) + self.eps * torch.eye(W4.shape[1]) - - - # build the nn structure - for i in range(self.num_layers): - if i == 0: - layer_input_dim = self.input_dim - else: - layer_input_dim = self.output_dims[i - 1] - self.layers.append(\ - torch.nn.Linear(layer_input_dim, self.hidden_dims[i], bias=False)) - # W = self.layers[-1].weight - # weight = W.clone() - # weight = W - # kernel = torch.matmul(weight.T, weight) + self.eps * torch.eye(W.shape[1]) - # kernel = torch.matmul(W.T, W) + self.eps * torch.eye(W.shape[1]) - dim_diff = self.output_dims[i] - layer_input_dim - if dim_diff > 0: - self.layers.append(torch.nn.Linear(layer_input_dim, dim_diff, bias=False)) - # print(kernel.shape, self.layers[-1].weight.shape) - # kernel = torch.cat((kernel, self.layers[-1].weight), dim=0) - # self.kernel.append(kernel) - self.update_kernel() - - def forward(self, x): - if isinstance(x, np.ndarray): - x = torch.from_numpy(x).float() - # put the input to the device - x = x.to(self.device) - - for i in range(self.num_layers): - # print('self.kernel[i].is_cuda\n', self.kernel[i].is_cuda) - # print('x.is_cuda\n', x.is_cuda) - layer_output = torch.matmul(self.kernel[i], x) - x = self.activations[i](layer_output) - values = torch.sum(torch.square(x), dim=-1) - return values - - def update_kernel(self): - self.kernel = [] # clear the kernel - param_idx = 0 # for skipping the extra layer parameters - for i in range(self.num_layers): - if i == 0: - layer_input_dim = self.input_dim - else: - layer_input_dim = self.output_dims[i - 1] - # build the positive definite part of the kernel - W = self.layers[i + param_idx].weight - weight = W.clone() - kernel = torch.matmul(weight.T, weight) + self.eps * torch.eye(W.shape[1]) - # if the kernel need extra part, append the parameters of the next layer - dim_diff = self.output_dims[i] - layer_input_dim - if dim_diff > 0: - kernel = torch.cat((kernel, self.layers[i+1].weight), dim=0) - param_idx += 1 - # print('i: ', i) - self.kernel.append(kernel) - - # def print_manual_kernel(self): - # print('Kernel 1:\n', self.kernel_1) - # print('Kernel 2:\n', self.kernel_2) - # print('Kernel 3:\n', self.kernel_3) - - # # print kernel eigenvalues - # eigvals, _ = np.linalg.eig(self.kernel_1[0:2, :].detach().numpy()) - # print('Eigenvalues of (W0.T*W0 + eps*I):', eigvals, '\n') - # eigvals, _ = np.linalg.eig(self.kernel_2.detach().numpy()) - # print('Eigenvalues of (W0.T*W0 + eps*I):', eigvals, '\n') - # eigvals, _ = np.linalg.eig(self.kernel_3.detach().numpy()) - # print('Eigenvalues of (W0.T*W0 + eps*I):', eigvals, '\n') - - def print_params(self): - offset = 0 - # get nn parameters - params = [] - for _, param in self.named_parameters(): - params.append(param.data) - for i, dim_diff in enumerate(np.diff(np.concatenate([[self.input_dim], self.output_dims]))): - print('Layer weights {}:'.format(i)) - W0 = params[offset + i] - print('W0:\n{}'.format(W0)) - if dim_diff > 0: - W1 = params[offset + 1 + i] - print('W1:\n{}'.format(W1)) - else: - offset += 1 - kernel = W0.T.dot(W0) + self.eps * np.eye(W0.shape[1]) - eigvals, _ = np.linalg.eig(kernel) - print('Eigenvalues of (W0.T*W0 + eps*I):', eigvals, '\n') - -class Lyapunov(object): - """A class for general Lyapunov functions. - - Parameters - ---------- - discretization : ndarray - A discrete grid on which to evaluate the Lyapunov function. - lyapunov_function : callable or instance of `DeterministicFunction` - The lyapunov function. Can be called with states and returns the - corresponding values of the Lyapunov function. - dynamics : a callable or an instance of `Function` - The dynamics model. Can be either a deterministic function or something - uncertain that includes error bounds. - lipschitz_dynamics : ndarray or float - The Lipschitz constant of the dynamics. Either globally, or locally - for each point in the discretization (within a radius given by the - discretization constant. This is the closed-loop Lipschitz constant - including the policy! - lipschitz_lyapunov : ndarray or float - The Lipschitz constant of the lyapunov function. Either globally, or - locally for each point in the discretization (within a radius given by - the discretization constant. - tau : float - The discretization constant. - policy : ndarray, optional - The control policy used at each state (Same number of rows as the - discretization). - initial_set : ndarray, optional - A boolean array of states that are known to be safe a priori. - adaptive : bool, optional - A boolean determining whether an adaptive discretization is used for - stability verification. - - """ - - def __init__(self, discretization, lyapunov_function, dynamics, - lipschitz_dynamics, lipschitz_lyapunov, - tau, policy, initial_set=None, adaptive=False): - """Initialization, see `Lyapunov` for details.""" - super(Lyapunov, self).__init__() - - self.discretization = discretization - self.policy = policy - - # Keep track of the safe sets - self.safe_set = np.zeros(np.prod(discretization.num_points), - dtype=bool) - - self.initial_safe_set = initial_set - if initial_set is not None: - # print('initial safe set\n', initial_set) - # print('initial safe set shape\n', initial_set.shape) - # print('initial safe set type\n', type(initial_set)) - # print('self.safe_set\n', self.safe_set) - # print('self.safe_set shape\n', self.safe_set.shape) - # print('self.safe_set type\n', type(self.safe_set)) - self.safe_set[initial_set] = True - - # Discretization constant - self.tau = tau - - # Make sure dynamics are of standard framework - self.dynamics = dynamics - - # Make sure Lyapunov fits into standard framework - self.lyapunov_function = lyapunov_function - - # Storage for graph - self._storage = dict() - # self.feed_dict = get_feed_dict(tf.get_default_graph()) - - # Lyapunov values - self.values = None - - # self.c_max = tf.placeholder(config.dtype, shape=()) - self.c_max = None - # self.feed_dict[self.c_max] = 0. - - self._lipschitz_dynamics = lipschitz_dynamics - self._lipschitz_lyapunov = lipschitz_lyapunov - - self.update_values() - - self.adaptive = adaptive - - # Keep track of the refinement `N(x)` used around each state `x` in - # the adaptive discretization; `N(x) = 0` by convention if `x` is - # unsafe - self._refinement = np.zeros(discretization.nindex, dtype=int) - if initial_set is not None: - self._refinement[initial_set] = 1 - - def update_values(self): - """Update the discretized values when the Lyapunov function changes.""" - values = np.zeros(self.discretization.nindex) - for i in range(self.discretization.nindex): - # print('self.discretization.all_points[i]\n', self.discretization.all_points[i]) - # print('self.lyapunov_function(self.discretization.all_points[i]).squeeze(), \n', \ - # self.lyapunov_function(\ - # self.discretization.all_points[i]).squeeze()) - values[i] = self.lyapunov_function(\ - self.discretization.all_points[i]).squeeze() - self.values = values - - def update_safe_set(self, can_shrink=True, max_refinement=1, - safety_factor=1., parallel_iterations=1): - """Compute and update the safe set. - - Parameters - ---------- - can_shrink : bool, optional - A boolean determining whether previously safe states other than the - initial safe set must be verified again (i.e., can the safe set - shrink in volume?) - max_refinement : int, optional - The maximum integer divisor used for adaptive discretization. - safety_factor : float, optional - A multiplicative factor greater than 1 used to conservatively - estimate the required adaptive discretization. - parallel_iterations : int, optional - The number of parallel iterations to use for safety verification in - the adaptive case. Passed to `tf.map_fn`. - - """ - safety_factor = np.maximum(safety_factor, 1.) - - np_states = lambda x: np.array(x, dtype=config.dtype) - # decrease = lambda x: self.v_decrease_bound(x, self.dynamics(x, self.policy(x))) - decrease = lambda x: self.v_decrease_bound(x, self.dynamics(x)) - threshold = lambda x: self.threshold(x, self.tau) - np_negative = lambda x: np.squeeze(decrease(x) < threshold(x), axis=0) - - if can_shrink: - # Reset the safe set and adaptive discretization - safe_set = np.zeros_like(self.safe_set, dtype=bool) - refinement = np.zeros_like(self._refinement, dtype=int) - if self.initial_safe_set is not None: - safe_set[self.initial_safe_set] = True - refinement[self.initial_safe_set] = 1 - else: - # Assume safe set cannot shrink - safe_set = self.safe_set - refinement = self._refinement - - value_order = np.argsort(self.values) - safe_set = safe_set[value_order] - refinement = refinement[value_order] - - # Verify safety in batches - batch_size = config.gp_batch_size - batch_generator = batchify((value_order, safe_set, refinement), - batch_size) - # print('batch_generator\n', batch_generator.__dir__()) - # exit() - index_to_state = self.discretization.index_to_state - - ####################################################################### - - for i, (indices, safe_batch, refine_batch) in batch_generator: - # print('indices\n', indices) - # print('safe_batch\n', safe_batch) - # print('refine_batch\n', refine_batch) - # exit() - - states = index_to_state(indices) - np_state = np.squeeze(states) - # print('np_states in update safe set\n', np_state) - # print('np_states shape\n', np_state.shape) - # print('np_states type\n', type(np_state)) - - # Update the safety with the safe_batch result - # negative = tf_negative.eval(feed_dict) - # negative = np_negative(np_state) - negative = np.zeros_like(safe_batch, dtype=bool) - for state_index in range(len(np_state)): - negative[state_index] = np_negative(np_state[state_index]) - # convert negative to np array - negative = np.array(negative, dtype=bool) - # check data type - # print('negative\n', negative) - # print('negative shape\n', negative.shape) - # print('negative type\n', type(negative)) - # print('safe_batch\n', safe_batch) - # print('safe_batch shape\n', safe_batch.shape) - # print('safe_batch type\n', type(safe_batch)) - safe_batch |= negative - # exit() - refine_batch[negative] = 1 - - # Boolean array: argmin returns first element that is False - # If all are safe then it returns 0 - bound = np.argmin(safe_batch) - refine_bound = 0 - - # Check if there are unsafe elements in the batch - if bound > 0 or not safe_batch[0]: - safe_batch[bound:] = False - refine_batch[bound:] = 0 - break - - # The largest index of a safe value - max_index = i + bound + refine_bound - 1 - - ####################################################################### - - # Set placeholder for c_max to the corresponding value - self.c_max = self.values[value_order[max_index]] - - # Restore the order of the safe set and adaptive refinement - safe_nodes = value_order[safe_set] - self.safe_set[:] = False - self.safe_set[safe_nodes] = True - self._refinement[value_order] = refinement - - # Ensure the initial safe set is kept - if self.initial_safe_set is not None: - self.safe_set[self.initial_safe_set] = True - self._refinement[self.initial_safe_set] = 1 - - def threshold(self, states, tau=None): - """Return the safety threshold for the Lyapunov condition. - - Parameters - ---------- - states : ndarray or Tensor - - tau : float or Tensor, optional - Discretization constant to consider. - - Returns - ------- - lipschitz : float, ndarray or Tensor - Either the scalar threshold or local thresholds, depending on - whether lipschitz_lyapunov and lipschitz_dynamics are local or not. - - """ - if tau is None: - tau = self.tau - # if state is not a tensor, convert it to a tensor - if not isinstance(states, torch.Tensor): - states = torch.tensor(states, dtype=config.dtype, requires_grad=True) - states = states.float() - # print('states\n', states) - lv = self._lipschitz_lyapunov(states) - # print('lv\n', lv) - # print('lv shape\n', lv.shape) - # print('hasattr(self._lipschitz_lyapunov, __call__)\n', hasattr(self._lipschitz_lyapunov, '__call__')) - ## TODO: check this part (by Mingxuan) - # if hasattr(self._lipschitz_lyapunov, '__call__') and lv.shape[1] > 1: - # # lv = tf.norm(lv, ord=1, axis=1, keepdims=True) - # lv = torch.norm(lv, p=1, dim=1, keepdim=True) - # convert states to np array - if states.is_cuda: - states = states.cpu() - states = states.detach().numpy() - lf = self._lipschitz_dynamics(states) - return - lv * (1. + lf) * tau - - def v_decrease_bound(self, states, next_states): - """Compute confidence intervals for the decrease along Lyapunov function. - - Parameters - ---------- - states : np.array - The states at which to start (could be equal to discretization). - next_states : np.array or tuple - The dynamics evaluated at each point on the discretization. If - the dynamics are uncertain then next_states is a tuple with mean - and error bounds. - - Returns - ------- - upper_bound : np.array - The upper bound on the change in values at each grid point. - - """ - v_dot, v_dot_error = self.v_decrease_confidence(states, next_states) - - return v_dot + v_dot_error - - def v_decrease_confidence(self, states, next_states): - """Compute confidence intervals for the decrease along Lyapunov function. - - Parameters - ---------- - states : np.array - The states at which to start (could be equal to discretization). - next_states : np.array - The dynamics evaluated at each point on the discretization. If - the dynamics are uncertain then next_states is a tuple with mean - and error bounds. - - Returns - ------- - mean : np.array - The expected decrease in values at each grid point. - error_bounds : np.array - The error bounds for the decrease at each grid point - - """ - if isinstance(next_states, Sequence): - next_states, error_bounds = next_states - lv = self._lipschitz_lyapunov(next_states) - # bound = tf.reduce_sum(lv * error_bounds, axis=1, keepdims=True) - # bound = torch.sum(lv * error_bounds, dim=1, keepdim=True) - bound = np.sum(lv * error_bounds, axis=1, keepdims=True) - else: - # bound = tf.constant(0., dtype=config.dtype) - bound = torch.tensor(0., dtype=config.dtype) - if not isinstance(states, torch.Tensor): - states = torch.tensor(states, dtype=torch.float64) - states = states.float() # avoid feedforward data type error - # next_states is of type casadi.DM - # convert the next_states first to numpy array, then to torch tensor - if not isinstance(next_states, torch.Tensor): - next_states = torch.tensor(np.array(next_states), dtype=torch.float64) - next_states = next_states.float() # avoid feedforward data type error - # print('next_states\n', next_states) - # print('next_states shape\n', next_states.shape) - # print('next_states type\n', type(next_states)) - # print('next_states data type\n', next_states.dtype) - v_decrease = (self.lyapunov_function(next_states) - - self.lyapunov_function(states)) - - return v_decrease, bound - -# TODO: put this in a separate file (by Mingxuan) -def batchify(arrays, batch_size): - """Yield the arrays in batches and in order. - - The last batch might be smaller than batch_size. - - Parameters - ---------- - arrays : list of ndarray - The arrays that we want to convert to batches. - batch_size : int - The size of each individual batch. - """ - if not isinstance(arrays, (list, tuple)): - arrays = (arrays,) - - # Iterate over array in batches - for i, i_next in zip(itertools.count(start=0, step=batch_size), - itertools.count(start=batch_size, step=batch_size)): - - batches = [array[i:i_next] for array in arrays] - - # Break if there are no points left - if batches[0].size: - yield i, batches - else: - break - -class GridWorld_pendulum(object): - """Base class for function approximators on a regular grid. - - Parameters - ---------- - limits: 2d array-like - A list of limits. For example, [(x_min, x_max), (y_min, y_max)] - num_points: 1d array-like - The number of points with which to grid each dimension. - - NOTE: in original Lyapunov NN, the grid is defined in a normalized - fashion (i.e. [-1, 1] for each dimension) - """ - - def __init__(self, limits, num_points): - """Initialization, see `GridWorld`.""" - super(GridWorld_pendulum, self).__init__() - - self.limits = np.atleast_2d(limits).astype(config.np_dtype) - num_points = np.broadcast_to(num_points, len(self.limits)) - self.num_points = num_points.astype(np.int16, copy=False) - self.state_dim = len(self.limits) - # print('self.state_dim: ', self.state_dim) - - if np.any(self.num_points < 2): - raise DimensionError('There must be at least 2 points in each ' - 'dimension.') - - # Compute offset and unit hyperrectangle - self.offset = self.limits[:, 0] - self.unit_maxes = ((self.limits[:, 1] - self.offset) - / (self.num_points - 1)).astype(config.np_dtype) - self.offset_limits = np.stack((np.zeros_like(self.limits[:, 0]), - self.limits[:, 1] - self.offset), - axis=1) - - # Statistics about the grid - self.discrete_points = [np.linspace(low, up, n, dtype=config.np_dtype) - for (low, up), n in zip(self.limits, - self.num_points)] - - self.nrectangles = np.prod(self.num_points - 1) - self.nindex = np.prod(self.num_points) - - self.ndim = len(self.limits) - self._all_points = None - - @property - def all_points(self): - """Return all the discrete points of the discretization. - - Returns - ------- - points : ndarray - An array with all the discrete points with size - (self.nindex, self.ndim). - - """ - if self._all_points is None: - # my own implementation - mesh = np.stack(np.meshgrid(*self.discrete_points),-1).reshape(-1,self.state_dim) - self._all_points = mesh.astype(config.np_dtype) - if self.all_points.shape[1] == 2: - # swap the first two columns - self._all_points[:,[0,1]] = self._all_points[:,[1,0]] - - # original implementation - # mesh = np.meshgrid(*self.discrete_points, indexing='ij') - # points = np.column_stack(col.ravel() for col in mesh) - # each row of the mesh is a point in the stat space - # self._all_points = points.astype(config.np_dtype) - - return self._all_points - - def __len__(self): - """Return the number of points in the discretization.""" - return self.nindex - - def sample_continuous(self, num_samples): - """Sample uniformly at random from the continuous domain. - - Parameters - ---------- - num_samples : int - - Returns - ------- - points : ndarray - Random points on the continuous rectangle. - - """ - limits = self.limits - rand = np.random.uniform(0, 1, size=(num_samples, self.ndim)) - return rand * np.diff(limits, axis=1).T + self.offset - - def sample_discrete(self, num_samples, replace=False): - """Sample uniformly at random from the discrete domain. - - Parameters - ---------- - num_samples : int - replace : bool, optional - Whether to sample with replacement. - - Returns - ------- - points : ndarray - Random points on the continuous rectangle. - - """ - idx = np.random.choice(self.nindex, size=num_samples, replace=replace) - return self.index_to_state(idx) - - def _check_dimensions(self, states): - """Raise an error if the states have the wrong dimension. - - Parameters - ---------- - states : ndarray - - """ - if not states.shape[1] == self.ndim: - raise DimensionError('the input argument has the wrong ' - 'dimensions.') - - def _center_states(self, states, clip=True): - """Center the states to the interval [0, x]. - - Parameters - ---------- - states : np.array - clip : bool, optinal - If False the data is not clipped to lie within the limits. - - Returns - ------- - offset_states : ndarray - - """ - states = np.atleast_2d(states).astype(config.np_dtype) - states = states - self.offset[None, :] - if clip: - np.clip(states, - self.offset_limits[:, 0] + 2 * _EPS, - self.offset_limits[:, 1] - 2 * _EPS, - out=states) - return states - - def index_to_state(self, indices): - """Convert indices to physical states. - - Parameters - ---------- - indices : ndarray (int) - The indices of points on the discretization. - - Returns - ------- - states : ndarray - The states with physical units that correspond to the indices. - - """ - indices = np.atleast_1d(indices) - ijk_index = np.vstack(np.unravel_index(indices, self.num_points)).T - ijk_index = ijk_index.astype(config.np_dtype) - return ijk_index * self.unit_maxes + self.offset - - def state_to_index(self, states): - """Convert physical states to indices. - - Parameters - ---------- - states: ndarray - Physical states on the discretization. - - Returns - ------- - indices: ndarray (int) - The indices that correspond to the physical states. - - """ - states = np.atleast_2d(states) - self._check_dimensions(states) - states = np.clip(states, self.limits[:, 0], self.limits[:, 1]) - states = (states - self.offset) * (1. / self.unit_maxes) - ijk_index = np.rint(states).astype(np.int32) - return np.ravel_multi_index(ijk_index.T, self.num_points) - - def state_to_rectangle(self, states): - """Convert physical states to its closest rectangle index. - - Parameters - ---------- - states : ndarray - Physical states on the discretization. - - Returns - ------- - rectangles : ndarray (int) - The indices that correspond to rectangles of the physical states. - - """ - ind = [] - for i, (discrete, num_points) in enumerate(zip(self.discrete_points, - self.num_points)): - idx = np.digitize(states[:, i], discrete) - idx -= 1 - np.clip(idx, 0, num_points - 2, out=idx) - - ind.append(idx) - return np.ravel_multi_index(ind, self.num_points - 1) - - def rectangle_to_state(self, rectangles): - """ - Convert rectangle indices to the states of the bottem-left corners. - - Parameters - ---------- - rectangles : ndarray (int) - The indices of the rectangles - - Returns - ------- - states : ndarray - The states that correspond to the bottom-left corners of the - corresponding rectangles. - - """ - rectangles = np.atleast_1d(rectangles) - ijk_index = np.vstack(np.unravel_index(rectangles, - self.num_points - 1)) - ijk_index = ijk_index.astype(config.np_dtype) - return (ijk_index.T * self.unit_maxes) + self.offset - - def rectangle_corner_index(self, rectangles): - """Return the index of the bottom-left corner of the rectangle. - - Parameters - ---------- - rectangles: ndarray - The indices of the rectangles. - - Returns - ------- - corners : ndarray (int) - The indices of the bottom-left corners of the rectangles. - - """ - ijk_index = np.vstack(np.unravel_index(rectangles, - self.num_points - 1)) - return np.ravel_multi_index(np.atleast_2d(ijk_index), - self.num_points) diff --git a/safe_control_gym/lyapunov/utilities.py b/safe_control_gym/lyapunov/utilities.py deleted file mode 100644 index 1090fc9ad..000000000 --- a/safe_control_gym/lyapunov/utilities.py +++ /dev/null @@ -1,722 +0,0 @@ - -# import itertools # for batchify (now in lyapnov.py) - -import numpy as np -from matplotlib.colors import ListedColormap -import scipy.linalg -from scipy import signal -import torch -from parfor import pmap -import multiprocessing as mp -import casadi as cs - -from safe_control_gym.lyapunov.lyapunov import GridWorld -from safe_control_gym.experiments.base_experiment import BaseExperiment -from safe_control_gym.lyapunov.lyapunov import config -from safe_control_gym.math_and_models.symbolic_systems import SymbolicModel - -NP_DTYPE = config.np_dtype -TF_DTYPE = config.dtype - -def gridding(state_dim, state_constraints, num_states = 251, use_zero_threshold = True): - ''' evenly discretize the state space - - Args: - state_dim (int): The dimension of the state space. - state_constraints (np array): The constraints of the state space. - num_state (int): The number of states along each dimension. - use_zero_threshold (bool): Whether to use zero threshold. - False: the grid is infinitesimal - ''' - - # State grid - if state_constraints is None: - state_constraints = np.array([[-1., 1.], ] * state_dim) - grid_limits = state_constraints - state_discretization = GridWorld(grid_limits, num_states) - - # Discretization constant - if use_zero_threshold: - tau = 0.0 # assume the grid is infinitesimal - else: - tau = np.sum(state_discretization.unit_maxes) / 2 - - print('Grid size: {}'.format(state_discretization.nindex)) - print('Discretization constant (tau): {}'.format(tau)) - return state_discretization - -def compute_roa(grid, env_func, ctrl ,equilibrium=None, no_traj=True): - """Compute the largest ROA as a set of states in a discretization.""" - if isinstance(grid, np.ndarray): - all_points = grid - nindex = grid.shape[0] - ndim = grid.shape[1] - else: # grid is a GridWorld instance - all_points = grid.all_points - nindex = grid.nindex # number of points in the discretization - ndim = grid.ndim # dimension of the state space - - # Forward-simulate all trajectories from initial points in the discretization - # if no_traj: - # end_states = all_points - # for t in range(1, horizon): - # end_states = closed_loop_dynamics(end_states) - # else: - # trajectories = np.empty((nindex, ndim, horizon)) - # trajectories[:, :, 0] = all_points - # for t in range(1, horizon): - # trajectories[:, :, t] = closed_loop_dynamics(trajectories[:, :, t - 1]) - # end_states = trajectories[:, :, -1] - random_env = env_func(gui=False) - - roa = np.zeros((nindex)) - trajectories = [{} for _ in range(nindex)] - - for state_index in range(nindex): - # for all initial state in the grid - # print('state_index', state_index) - init_state = grid.all_points[state_index] - init_state_dict = {'init_x': init_state[0], 'init_x_dot': init_state[1], \ - 'init_theta': init_state[2], 'init_theta_dot': init_state[3]} - init_state, _ = random_env.reset(init_state = init_state_dict) - # print('init_state', init_state) - static_env = env_func(gui=False, random_state=False, init_state=init_state) - static_train_env = env_func(gui=False, randomized_init=False, init_state=init_state) - # Create experiment, train, and run evaluation - experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env) - - try: - trajs_data, _ = experiment.run_evaluation(training=True, n_episodes=1, verbose=False) - roa[state_index] = trajs_data['info'][-1][-1]['goal_reached'] - input_traj = trajs_data['action'][0] - state_traj = trajs_data['obs'][0] - trajectories[state_index]['state_traj'] = state_traj - trajectories[state_index]['input_traj'] = input_traj - print('trajectory[state_index]', trajectories[state_index]) - - print('goal reached', trajs_data['info'][-1][-1]['goal_reached']) - # exit() - # close environments - static_env.close() - static_train_env.close() - except RuntimeError: - print('RuntimeError: possibly infeasible initial state') - roa[state_index] = False - # print(ctrl.model.__dir__()) - # print(ctrl.model.nx) - # exit() - trajectories[state_index]['state_traj'] = np.zeros((2, ctrl.model.nx)) - trajectories[state_index]['input_traj'] = np.zeros((1, ctrl.model.nu)) - # close environments - static_env.close() - static_train_env.close() - continue - # trajs_data, _ = experiment.run_evaluation(training=True, n_episodes=1, verbose=False) - # print('obs\n', trajs_data['obs']) - # print('trajs_data\n', trajs_data['info'][-1][-1]) - # print('\n') - # print('trajs_data[\'info\']\n', trajs_data['info'][-1][-1]['goal_reached']) - # input('press enter to continue') - # print('\n') - # exit() - # print('goal reached', trajs_data['info'][-1][1]['goal_reached']) - - - # if equilibrium is None: - # equilibrium = np.zeros((1, ndim)) - random_env.close() - # # Compute an approximate ROA as all states that end up "close" to 0 - # dists = np.linalg.norm(end_states - equilibrium, ord=2, axis=1, keepdims=True).ravel() - # roa = (dists <= tol) - if no_traj: - return roa - else: - return roa, trajectories - - - -def compute_roa_fix(grid, env_func, ctrl ,equilibrium=None, no_traj=True): - """Compute the largest ROA as a set of states in a discretization.""" - if isinstance(grid, np.ndarray): - all_points = grid - nindex = grid.shape[0] - ndim = grid.shape[1] - else: # grid is a GridWorld instance - all_points = grid.all_points - nindex = grid.nindex # number of points in the discretization - ndim = grid.ndim # dimension of the state space - - random_env = env_func(gui=False) - - roa = np.zeros((nindex)) - - for state_index in range(nindex): - # for all initial state in the grid - # print('state_index', state_index) - init_state = grid.all_points[state_index] - init_state_dict = {'init_x': 0.0, 'init_x_dot': init_state[0], \ - 'init_theta': init_state[1], 'init_theta_dot': init_state[2]} - init_state, _ = random_env.reset(init_state = init_state_dict) - # print('init_state', init_state) - static_env = env_func(gui=False, random_state=False, init_state=init_state) - static_train_env = env_func(gui=False, randomized_init=False, init_state=init_state) - # Create experiment, train, and run evaluation - experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env) - - try: - trajs_data, _ = experiment.run_evaluation(training=True, n_episodes=1, verbose=False) - roa[state_index] = trajs_data['info'][-1][-1]['goal_reached'] - # close environments - static_env.close() - static_train_env.close() - except RuntimeError: - print('RuntimeError: possibly infeasible initial state') - roa[state_index] = False - # close environments - static_env.close() - static_train_env.close() - continue - - # if equilibrium is None: - # equilibrium = np.zeros((1, ndim)) - random_env.close() - # # Compute an approximate ROA as all states that end up "close" to 0 - # dists = np.linalg.norm(end_states - equilibrium, ord=2, axis=1, keepdims=True).ravel() - # roa = (dists <= tol) - if no_traj: - return roa - else: - return roa, trajectories - - -# define the function to be parallelized -def simulate_at_index(state_index, grid, env_func, ctrl): - random_env = env_func(gui=False) - init_state = grid.all_points[state_index] - init_state_dict = {'init_x': init_state[0], 'init_x_dot': init_state[1], \ - 'init_theta': init_state[2], 'init_theta_dot': init_state[3]} - init_state, _ = random_env.reset(init_state = init_state_dict) - # print('init_state', init_state) - static_env = env_func(gui=False, random_state=False, init_state=init_state) - static_train_env = env_func(gui=False, randomized_init=False, init_state=init_state) - # Create experiment, train, and run evaluation - experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env) - - # # if infeasible initial state, return False - # try: - # trajs_data, _ = experiment.run_evaluation(training=True, n_episodes=1, verbose=False) - # static_env.close() - # static_train_env.close() - # print('goal reached', trajs_data['info'][-1][-1]['goal_reached']) - # # return result - # if trajs_data['info'][-1][-1]['goal_reached']: - # return True - # else: - # return False - # except RuntimeError: - # print('RuntimeError: possibly infeasible initial state') - # # close environments - # static_env.close() - # static_train_env.close() - # return False - # # close the env - trajs_data, _ = experiment.run_evaluation(training=True, n_episodes=1, verbose=False) - static_env.close() - static_train_env.close() - random_env.close() - - return trajs_data['info'][-1][-1]['goal_reached'] - - -def compute_roa_par(grid, env_func, ctrl, equilibrium=None, no_traj=True): - """Compute the largest ROA as a set of states in a discretization.""" - if isinstance(grid, np.ndarray): - all_points = grid - nindex = grid.shape[0] - ndim = grid.shape[1] - else: # grid is a GridWorld instance - all_points = grid.all_points - nindex = grid.nindex # number of points in the discretization - ndim = grid.ndim # dimension of the state space - - # Forward-simulate all trajectories from initial points in the discretization - # random_env = env_func(gui=False) - roa = [False] * nindex - - # # init multiprocessing pool - # pool = mp.Pool(mp.cpu_count()) - # # pool apply the 'simulate_at_index' function to all state indices - # roa = [pool.apply(simulate_at_index, \ - # args=(state_idx, grid, random_env, env_func, ctrl)) for state_idx in range(nindex)] - # # close the pool - # pool.close() - # roa = pmap(simulate_at_index, range(nindex), (grid, random_env, env_func, ctrl)) - roa = pmap(simulate_at_index, range(nindex), (grid, env_func, ctrl)) - - # convert list to np array - roa = np.array(roa) - - if no_traj: - return roa - else: - return roa, trajectories - -# define the function to be parallelized -def simulate_at_index_fix(state_index, grid, env_func, ctrl): - random_env = env_func(gui=False) - init_state = grid.all_points[state_index] - init_state_dict = {'init_x': 0.0, 'init_x_dot': init_state[0], \ - 'init_theta': init_state[1], 'init_theta_dot': init_state[2]} - init_state, _ = random_env.reset(init_state = init_state_dict) - # print('init_state', init_state) - static_env = env_func(gui=False, random_state=False, init_state=init_state) - static_train_env = env_func(gui=False, randomized_init=False, init_state=init_state) - # Create experiment, train, and run evaluation - experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env) - - trajs_data, _ = experiment.run_evaluation(training=True, n_episodes=1, verbose=False) - static_env.close() - static_train_env.close() - random_env.close() - - return trajs_data['info'][-1][-1]['goal_reached'] - - -def compute_roa_fix_par(grid, env_func, ctrl, equilibrium=None, no_traj=True): - """Compute the largest ROA as a set of states in a discretization.""" - if isinstance(grid, np.ndarray): - all_points = grid - nindex = grid.shape[0] - ndim = grid.shape[1] - else: # grid is a GridWorld instance - all_points = grid.all_points - nindex = grid.nindex # number of points in the discretization - ndim = grid.ndim # dimension of the state space - - # Forward-simulate all trajectories from initial points in the discretization - roa = [False] * nindex - roa = pmap(simulate_at_index_fix, range(nindex), (grid, env_func, ctrl)) - # convert list to np array - roa = np.array(roa) - - if no_traj: - return roa - else: - return roa, trajectories - -def binary_cmap(color='red', alpha=1.): - """Construct a binary colormap.""" - if color == 'red': - color_code = (1., 0., 0., alpha) - elif color == 'green': - color_code = (0., 1., 0., alpha) - elif color == 'blue': - color_code = (0., 0., 1., alpha) - else: - color_code = color - transparent_code = (1., 1., 1., 0.) - return ListedColormap([transparent_code, color_code]) - -def balanced_class_weights(y_true, scale_by_total=True): - """Compute class weights from class label counts.""" - y = y_true.astype(np.bool_) - nP = y.sum() - nN = y.size - y.sum() - class_counts = np.array([nN, nP]) - - weights = np.ones_like(y, dtype=float) - weights[ y] /= nP - weights[~y] /= nN - if scale_by_total: - weights *= y.size - - return weights, class_counts - -def dlqr(a, b, q, r): - """Compute the discrete-time LQR controller. - - The optimal control input is `u = -k.dot(x)`. - - Parameters - ---------- - a : np.array - b : np.array - q : np.array - r : np.array - - Returns - ------- - k : np.array - Controller matrix - p : np.array - Cost to go matrix - """ - a, b, q, r = map(np.atleast_2d, (a, b, q, r)) - p = scipy.linalg.solve_discrete_are(a, b, q, r) - - # LQR gain - # k = (b.T * p * b + r)^-1 * (b.T * p * a) - bp = b.T.dot(p) - tmp1 = bp.dot(b) - tmp1 += r - tmp2 = bp.dot(a) - k = np.linalg.solve(tmp1, tmp2) - - return k, p - -def discretize_linear_system(A, B, dt, exact=False): - '''Discretization of a linear system - - dx/dt = A x + B u - --> xd[k+1] = Ad xd[k] + Bd ud[k] where xd[k] = x(k*dt) - - Args: - A (ndarray): System transition matrix. - B (ndarray): Input matrix. - dt (scalar): Step time interval. - exact (bool): If to use exact discretization. - - Returns: - Ad (ndarray): The discrete linear state matrix A. - Bd (ndarray): The discrete linear input matrix B. - ''' - - state_dim, input_dim = A.shape[1], B.shape[1] - - if exact: - M = np.zeros((state_dim + input_dim, state_dim + input_dim)) - M[:state_dim, :state_dim] = A - M[:state_dim, state_dim:] = B - - Md = scipy.linalg.expm(M * dt) - Ad = Md[:state_dim, :state_dim] - Bd = Md[:state_dim, state_dim:] - else: - Identity = np.eye(state_dim) - Ad = Identity + A * dt - Bd = B * dt - - return Ad, Bd - -def get_discrete_linear_system_matrices(model, x_0, u_0): - '''Get discrete linear system matrices for a given model. - - Args: - model (ctrl.model) - x_0 (ndarray): The initial state. - u_0 (ndarray): The initial input. - - Returns: - A (ndarray): The discrete linear state matrix A. - B (ndarray): The discrete linear input matrix B. - ''' - - # Linearization. - df = model.df_func(x_0, u_0) - A, B = df[0].toarray(), df[1].toarray() - - # Discretize. - A, B = discretize_linear_system(A, B, model.dt) - - return A, B - -def onestep_dynamics(x, env_func, ctrl): - ''' one-step forward dynamics ''' - # get the format of the initial state - random_env = env_func(gui=False) - init_state_dict = {'init_x': x[0], 'init_x_dot': x[1], \ - 'init_theta': x[2], 'init_theta_dot': x[3]} - init_state, _ = random_env.reset(init_state = init_state_dict) - static_env = env_func(gui=False, random_state=False, init_state=init_state) - static_train_env = env_func(gui=False, randomized_init=False, init_state=init_state) - experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env) - trajs_data, _ = experiment.run_evaluation(training=False, n_steps=1, verbose=False) - x = trajs_data['obs'][0][-1] - static_env.close() - static_train_env.close() - random_env.close() - - return x - - - -class InvertedPendulum(object): - """Inverted Pendulum. - - Parameters - ---------- - mass : float - length : float - friction : float, optional - dt : float, optional - The sampling time. - normalization : tuple, optional - A tuple (Tx, Tu) of arrays used to normalize the state and actions. It - is so that diag(Tx) *x_norm = x and diag(Tu) * u_norm = u. - - """ - - def __init__(self, mass, length, friction=0, dt=1 / 80, - normalization=None): - """Initialization; see `InvertedPendulum`.""" - super(InvertedPendulum, self).__init__() - self.mass = mass - self.length = length - self.gravity = 9.81 - self.friction = friction - self.dt = dt - self.nx = 2 - self.nu = 1 - self.symbolic = None - - self.normalization = normalization - if normalization is not None: - self.normalization = [np.array(norm, dtype=config.np_dtype) - for norm in normalization] - self.inv_norm = [norm ** -1 for norm in self.normalization] - - def __call__(self, *args, **kwargs): - """Evaluate the function using the template to ensure variable sharing. - - Parameters - ---------- - args : list - The input arguments to the function. - kwargs : dict, optional - The keyword arguments to the function. - - Returns - ------- - outputs : list - The output arguments of the function as given by evaluate. - - """ - - outputs = self.forward(*args, **kwargs) - return outputs - - @property - def inertia(self): - """Return inertia of the pendulum.""" - return self.mass * self.length ** 2 - - def normalize(self, state, action): - """Normalize states and actions.""" - if self.normalization is None: - return state, action - - Tx_inv, Tu_inv = map(np.diag, self.inv_norm) - # if isinstance(Tx_inv, np.ndarray): - # Tx_inv = torch.from_numpy(Tx_inv) - # if isinstance(Tu_inv, np.ndarray): - # Tu_inv = torch.from_numpy(Tu_inv) - # state = tf.matmul(state, Tx_inv) - # state = torch.matmul(state, Tx_inv) - state = np.matmul(state, Tx_inv) - - if action is not None: - # action = tf.matmul(action, Tu_inv) - # action = torch.matmul(action, Tu_inv) - action = np.matmul(action, Tu_inv) - - return state, action - - def denormalize(self, state, action): - """De-normalize states and actions.""" - if self.normalization is None: - return state, action - - Tx, Tu = map(np.diag, self.normalization) - - # state = tf.matmul(state, Tx) - # convert to torch - # if isinstance(Tx, np.ndarray): - # Tx = torch.from_numpy(Tx) - # if isinstance(Tu, np.ndarray): - # Tu = torch.from_numpy(Tu) - - # state = torch.matmul(state, Tx) - state = np.matmul(state, Tx) - if action is not None: - # action = tf.matmul(action, Tu) - # action = torch.matmul(action, Tu) - action = np.matmul(action, Tu) - - return state, action - - def linearize(self): - """Return the linearized system. - - Returns - ------- - a : ndarray - The state matrix. - b : ndarray - The action matrix. - - """ - gravity = self.gravity - length = self.length - friction = self.friction - inertia = self.inertia - - A = np.array([[0, 1], - [gravity / length, -friction / inertia]], - dtype=config.np_dtype) - - B = np.array([[0], - [1 / inertia]], - dtype=config.np_dtype) - - if self.normalization is not None: - Tx, Tu = map(np.diag, self.normalization) - Tx_inv, Tu_inv = map(np.diag, self.inv_norm) - - A = np.linalg.multi_dot((Tx_inv, A, Tx)) - B = np.linalg.multi_dot((Tx_inv, B, Tu)) - - sys = signal.StateSpace(A, B, np.eye(2), np.zeros((2, 1))) - sysd = sys.to_discrete(self.dt) - return sysd.A, sysd.B - - # @concatenate_inputs(start=1) - def forward(self, state_action): - """Evaluate the dynamics.""" - # Denormalize - # state, action = tf.split(state_action, [2, 1], axis=1) - # state, action = torch.split(state_action, [2, 1], dim=0) - # print('np.split(state_action, [2, 1], axis=0)', np.split(state_action, [2], axis=0)) - state, action = np.split(state_action, [2], axis=0) - state, action = self.denormalize(state, action) - - n_inner = 10 - dt = self.dt / n_inner - for i in range(n_inner): - state_derivative = self.ode(state, action) - state = state + dt * state_derivative - - return self.normalize(state, None)[0] - - def ode(self, state, action): - """Compute the state time-derivative. - - Parameters - ---------- - states: ndarray or Tensor - Unnormalized states. - actions: ndarray or Tensor - Unnormalized actions. - - Returns - ------- - x_dot: Tensor - The normalized derivative of the dynamics - - """ - # Physical dynamics - gravity = self.gravity - length = self.length - friction = self.friction - inertia = self.inertia - - # angle, angular_velocity = tf.split(state, 2, axis=1) - # print('state', state) - # print('split result', torch.split(state, 1, dim=0)) - # print('np.split(state, [1], axis=0)', np.split(state, [1], axis=-1)) - # angle, angular_velocity = torch.split(state, 1, dim=-1) - angle, angular_velocity = np.split(state, [1], axis=-1) - - # x_ddot = gravity / length * tf.sin(angle) + action / inertia - # x_ddot = gravity / length * torch.sin(angle) + action / inertia - x_ddot = gravity / length * np.sin(angle) + action / inertia - - if friction > 0: - x_ddot -= friction / inertia * angular_velocity - - # state_derivative = tf.concat((angular_velocity, x_ddot), axis=1) - # state_derivative = torch.cat((angular_velocity, x_ddot), dim=-1) - state_derivative = np.concatenate((angular_velocity, x_ddot), axis=-1) - - # Normalize - return state_derivative - - def _setup_symbolic(self, prior_prop={}, **kwargs): - """Setup the casadi symbolic dynamics.""" - length = self.length - gravity = self.gravity - mass = self.mass - friction = self.friction - inertia = self.inertia # mass * length ** 2 - dt = self.dt - # Input variables. - theta = cs.MX.sym('theta') - theta_dot = cs.MX.sym('theta_dot') - X = cs.vertcat(theta, theta_dot) - U = cs.MX.sym('u') - nx = 2 - nu = 1 - # Dynamics. - theta_ddot = gravity / length * cs.sin(theta) + U / inertia - if friction > 0: - theta_ddot -= friction / inertia * theta_dot - X_dot = cs.vertcat(theta_dot, theta_ddot) - # Observation. - Y = cs.vertcat(theta, theta_dot) - # Define cost (quandratic form). - Q = cs.MX.sym('Q', nx, nx) - R = cs.MX.sym('R', nu, nu) - Xr = cs.MX.sym('Xr', nx, 1) - Ur = cs.MX.sym('Ur', nu, 1) - cost_func = 0.5 * (X - Xr).T @ Q @ (X - Xr) + 0.5 * (U - Ur).T @ R @ (U - Ur) - # Define dynamics and cost dictionaries. - dynamics = {'dyn_eqn': X_dot, 'obs_eqn': Y, 'vars': {'X': X, 'U': U}} - cost = {'cost_func': cost_func, 'vars': {'X': X, 'U': U, 'Xr': Xr, 'Ur': Ur, 'Q': Q, 'R': R}} - params = { - # prior inertial properties - 'pole_length': length, - 'pole_mass': mass, - # equilibrium point for linearization - 'X_EQ': np.zeros(self.nx), - 'U_EQ': np.atleast_2d(Ur)[0, :], - } - # Setup symbolic model. - self.symbolic = SymbolicModel(dynamics=dynamics, cost=cost, dt=dt, params=params) - -def compute_roa_pendulum(grid, closed_loop_dynamics, horizon=100, tol=1e-3, equilibrium=None, no_traj=True): - """Compute the largest ROA as a set of states in a discretization.""" - if isinstance(grid, np.ndarray): - all_points = grid - nindex = grid.shape[0] - ndim = grid.shape[1] - else: # grid is a GridWorld instance - all_points = grid.all_points - nindex = grid.nindex - ndim = grid.ndim - - # Forward-simulate all trajectories from initial points in the discretization - if no_traj: - end_states = all_points - for t in range(1, horizon): - end_states = closed_loop_dynamics(end_states) - else: - trajectories = np.empty((nindex, ndim, horizon)) - trajectories[:, :, 0] = all_points - for t in range(1, horizon): - # print('trajectories[:, :, t - 1]', trajectories[1, :, t - 1]) - # print('trajectories[:, :, t - 1].shape', trajectories[1, :, t - 1].shape) - # simulate all states in the grid - for state_index in range(nindex): - trajectories[state_index, :, t] = closed_loop_dynamics(trajectories[state_index, :, t - 1]) - - end_states = trajectories[:, :, -1] - - if equilibrium is None: - equilibrium = np.zeros((1, ndim)) - - # Compute an approximate ROA as all states that end up "close" to 0 - dists = np.linalg.norm(end_states - equilibrium, ord=2, axis=1, keepdims=True).ravel() - roa = (dists <= tol) - if no_traj: - return roa - else: - return roa, trajectories \ No newline at end of file diff --git a/tests/test_hpo/test_train.py b/tests/test_hpo/test_train.py index 8e2bcc28c..ef83011f9 100644 --- a/tests/test_hpo/test_train.py +++ b/tests/test_hpo/test_train.py @@ -91,6 +91,7 @@ def test_train_cartpole(SYS, TASK, ALGO, PRIOR, HYPERPARAMETER): # drop database drop(munch.Munch({'tag': f'{ALGO}_hpo'})) + @pytest.mark.parametrize('SYS', ['quadrotor_2D', 'quadrotor_2D_attitude']) @pytest.mark.parametrize('TASK', ['track']) @pytest.mark.parametrize('ALGO', ['ppo', 'sac', 'gp_mpc']) @@ -118,7 +119,7 @@ def test_train_quad(SYS, TASK, ALGO, PRIOR, HYPERPARAMETER): raise ValueError('optimimum hyperparameters are not available for quadrotor') else: raise ValueError('HYPERPARAMETER must be either default or optimimum') - + if ALGO == 'gp_mpc': PRIOR = '150' sys.argv[1:] = ['--algo', ALGO, @@ -142,7 +143,7 @@ def test_train_quad(SYS, TASK, ALGO, PRIOR, HYPERPARAMETER): '--opt_hps', opt_hp_path, '--seed', '6', '--use_gpu', 'True' - ] + ] fac = ConfigFactory() fac.add_argument('--opt_hps', type=str, default='', help='yaml file as a result of HPO.')