diff --git a/benchmarks/on-policy/README.md b/benchmarks/on-policy/README.md index b94c87bd9..ac68421a6 100644 --- a/benchmarks/on-policy/README.md +++ b/benchmarks/on-policy/README.md @@ -1468,14 +1468,14 @@ class="math inline">± 30.93 SafetyCarGoal1-v0 --0.65 ± -2.89 -22.90 ± -16.85 -1.89 ± -3.52 -4.86 ± -3.11 +7.12 ± +5.41 +21.68 ± +29.11 +16.67 ± +10.57 +23.58 ± +26.39 0.81 ± 0.41 17.18 ± @@ -1508,14 +1508,14 @@ class="smallcaps">SafetyCarButton1-v0 SafetyCarGoal2-v0 --0.87 ± -0.79 -6.13 ± -4.51 --1.03 ± -1.46 -18.07 ± -11.62 +0.90 ± +1.20 +19.98 ± +10.12 +1.76 ± +5.20 +31.50 ± +45.50 -0.96 ± 1.10 3.00 ± @@ -1548,14 +1548,14 @@ class="smallcaps">SafetyCarButton2-v0 SafetyPointGoal1-v0 -1.99 ± -2.87 -7.80 ± -2.78 -1.02 ± -0.80 -7.46 ± -5.26 +7.06 ± +5.85 +20.04 ± +21.91 +16.18 ± +9.55 +29.94 ± +26.68 1.69 ± 3.25 5.34 ± @@ -1588,14 +1588,14 @@ class="smallcaps">SafetyPointButton1-v0 SafetyPointGoal2-v0 --1.85 ± -0.99 -21.77 ± -13.56 --1.38 ± -1.16 -7.87 ± -2.02 +0.84 ± +2.93 +14.06 ± +30.21 +1.64 ± +4.02 +19.00 ± +34.69 -1.13 ± 0.39 7.03 ± @@ -2573,6 +2573,28 @@ class="smallcaps">SafetyPointButton2-v0 + + + +
+ +
+
+ SafetyCarCircle1-v0 +
+
+ + + + +
+ +
+
+ SafetyCarCircle2-v0 +
+
+
@@ -2617,6 +2639,28 @@ class="smallcaps">SafetyPointButton2-v0
+ + + +
+ +
+
+ SafetyPointCircle1-v0 +
+
+ + + + +
+ +
+
+ SafetyPointCircle2-v0 +
+
+
diff --git a/omnisafe/configs/on-policy/PPOSaute.yaml b/omnisafe/configs/on-policy/PPOSaute.yaml index 7933463ee..4d34ebe35 100644 --- a/omnisafe/configs/on-policy/PPOSaute.yaml +++ b/omnisafe/configs/on-policy/PPOSaute.yaml @@ -126,3 +126,91 @@ defaults: activation: tanh # learning rate lr: 0.0003 + +SafetyCarCircle1-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarCircle2-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarGoal1-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # normalize observation + obs_normalize: False + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarGoal2-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # normalize observation + obs_normalize: False + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointCircle1-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointCircle2-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointGoal1-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # normalize observation + obs_normalize: False + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointGoal2-v0: + # algorithm configurations + algo_cfgs: + # number of iterations to update the policy + update_iters: 80 + # normalize observation + obs_normalize: False + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 diff --git a/omnisafe/configs/on-policy/TRPOSaute.yaml b/omnisafe/configs/on-policy/TRPOSaute.yaml index e67992b92..c5e1284f9 100644 --- a/omnisafe/configs/on-policy/TRPOSaute.yaml +++ b/omnisafe/configs/on-policy/TRPOSaute.yaml @@ -132,3 +132,67 @@ defaults: activation: tanh # learning rate lr: 0.001 + +SafetyCarCircle1-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarCircle2-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarGoal1-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyCarGoal2-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointCircle1-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointCircle2-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointGoal1-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2 + +SafetyPointGoal2-v0: + # algorithm configurations + algo_cfgs: + # The saute gamma + saute_gamma: 0.9999 + # The reward when the agent is unsafe + unsafe_reward: -0.2