From 02301e93b605a539c1405b2a6813d1d7bf8530f6 Mon Sep 17 00:00:00 2001 From: Gaiejj <524339208@qq.com> Date: Mon, 4 Sep 2023 21:05:59 +0800 Subject: [PATCH] feat: update saute config and benchmarking results --- benchmarks/on-policy/README.md | 108 +++++++++++++++------- omnisafe/configs/on-policy/PPOSaute.yaml | 88 ++++++++++++++++++ omnisafe/configs/on-policy/TRPOSaute.yaml | 64 +++++++++++++ 3 files changed, 228 insertions(+), 32 deletions(-) diff --git a/benchmarks/on-policy/README.md b/benchmarks/on-policy/README.md index b94c87bd9..ac68421a6 100644 --- a/benchmarks/on-policy/README.md +++ b/benchmarks/on-policy/README.md @@ -1468,14 +1468,14 @@ class="math inline">± 30.93
+
+ +
+ SafetyCarCircle1-v0
+
+ |
+
+
+ +
+ SafetyCarCircle2-v0
+
+ |
+
@@ -2617,6 +2639,28 @@ class="smallcaps">SafetyPointButton2-v0 |
+
+ +
+ SafetyPointCircle1-v0
+
+ |
+
+
+ +
+ SafetyPointCircle2-v0
+
+ |
+
diff --git a/omnisafe/configs/on-policy/PPOSaute.yaml b/omnisafe/configs/on-policy/PPOSaute.yaml index 7933463ee..4d34ebe35 100644 --- a/omnisafe/configs/on-policy/PPOSaute.yaml +++ b/omnisafe/configs/on-policy/PPOSaute.yaml @@ -126,3 +126,91 @@ defaults: activation: tanh # learning rate lr: 0.0003 + +SafetyCarCircle1-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarCircle2-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarGoal1-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # normalize observation + obs_normalize: False + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarGoal2-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # normalize observation + obs_normalize: False + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointCircle1-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointCircle2-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointGoal1-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # normalize observation + obs_normalize: False + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointGoal2-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # normalize observation + obs_normalize: False + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 diff --git a/omnisafe/configs/on-policy/TRPOSaute.yaml b/omnisafe/configs/on-policy/TRPOSaute.yaml index e67992b92..c5e1284f9 100644 --- a/omnisafe/configs/on-policy/TRPOSaute.yaml +++ b/omnisafe/configs/on-policy/TRPOSaute.yaml @@ -132,3 +132,67 @@ defaults: activation: tanh # learning rate lr: 0.001 + +SafetyCarCircle1-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarCircle2-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarGoal1-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarGoal2-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointCircle1-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointCircle2-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointGoal1-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointGoal2-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 |