Skip to content

Commit

Permalink
Changing reward structure and lowering sf_penalty
Browse files Browse the repository at this point in the history
  • Loading branch information
pizarrob committed Nov 8, 2023
1 parent c050e7f commit f9d67fc
Show file tree
Hide file tree
Showing 583 changed files with 21,058 additions and 64,266 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ algo_config:
# safety filter
filter_train_actions: True
penalize_sf_diff: True
sf_penalty: 300
sf_penalty: 150
use_safe_reset: True
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,10 @@ task_config:
obs_goal_horizon: 1

# RL Reward
rew_state_weight: [1, 0.01, 1, 0.01, 1, 0.01, 0.5, 0.5, 0.5, 0.01, 0.01, 0.01]
rew_act_weight: 0.0001
rew_state_weight: [1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0]
rew_act_weight: 0
rew_exponential: True
info_mse_metric_state_weight: [1, 0.01, 1, 0.01, 1, 0.01, 0.5, 0.5, 0.5, 0.01, 0.01, 0.01]
info_mse_metric_state_weight: [1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0]

constraints:
- constraint_form: default_constraint
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,17 +152,17 @@ task_config:
info_in_reset: true
info_mse_metric_state_weight:
- 1
- 0.01
- 0
- 1
- 0.01
- 0
- 1
- 0.01
- 0.5
- 0.5
- 0.5
- 0.01
- 0.01
- 0.01
- 0
- 0
- 0
- 0
- 0
- 0
- 0
init_state: null
init_state_randomization_info:
init_p:
Expand Down Expand Up @@ -221,21 +221,21 @@ task_config:
quad_type: 3
randomized_inertial_prop: false
randomized_init: true
rew_act_weight: 0.0001
rew_act_weight: 0
rew_exponential: true
rew_state_weight:
- 1
- 0.01
- 0
- 1
- 0.01
- 0
- 1
- 0.01
- 0.5
- 0.5
- 0.5
- 0.01
- 0.01
- 0.01
- 0
- 0
- 0
- 0
- 0
- 0
- 0
seed: 1337
task: traj_tracking
task_info:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,101 +1,101 @@
step,loss/approx_kl
10000,0.017213520842293897
20000,0.02764010163179288
30000,0.01743585287282864
40000,0.017944027467941246
50000,0.018553049815818668
60000,0.023324231663718824
70000,0.024198097828775644
80000,0.013849695399403571
90000,0.020018802893658476
100000,0.020879797528808315
110000,0.020450342632830146
120000,0.034239981804663935
130000,0.026658428382749354
140000,0.022649019258096814
150000,0.022941715223714708
160000,0.007890155275041858
170000,0.014145796125133833
180000,0.021201497875154015
190000,0.01608441020362079
200000,0.020309033151715995
210000,0.03166960822418333
220000,0.02117745455664893
230000,0.026413527317345142
240000,0.02563850761701663
250000,0.030556447710841893
260000,0.02503080541888873
270000,0.015589506893108288
280000,0.022843319425980255
290000,0.02117375050050517
300000,0.0253074764739722
310000,0.015984897377590335
320000,0.028337029942000908
330000,0.01633354507697125
340000,0.03463563905097544
350000,0.024914210988208653
360000,0.019485377830763657
370000,0.020445223230247694
380000,0.014343827916309237
390000,0.0241228209498028
400000,0.02197242130835851
410000,0.024426490704839425
420000,0.012739477089295787
430000,0.025227935565635563
440000,0.02386964901039998
450000,0.029704142517099776
460000,0.02316980489219228
470000,0.018668918684124947
480000,0.01797391955430309
490000,0.017083619410792988
500000,0.021401571932559208
510000,0.016890498654296003
520000,0.019246815827985604
530000,0.025200538026789825
540000,0.009884566161781547
550000,0.018769014471520983
560000,0.0193116427709659
570000,0.03082204340025783
580000,0.03612479014943043
590000,0.024618654077251754
600000,0.026440184066692983
610000,0.013226955570280552
620000,0.016506727163990337
630000,0.029131261756022764
640000,0.03199023871372143
650000,0.03162956436475117
660000,0.025987505105634533
670000,0.03066992412010829
680000,0.03358819276715318
690000,0.018738419531534115
700000,0.02501208819448948
710000,0.033234185290833315
720000,0.02395644110317032
730000,0.021680068410933014
740000,0.02534624853481849
750000,0.022559438801060122
760000,0.02713867419709761
770000,0.01857999600470066
780000,0.026966472839315726
790000,0.01576397685954968
800000,0.015534047378847995
810000,0.02498807981610298
820000,0.02157363562534253
830000,0.02366078707079093
840000,0.033624009167154634
850000,0.02301981703688701
860000,0.023170643423994382
870000,0.03527537065868576
880000,0.024989187065511943
890000,0.02142417756840587
900000,0.023315265898903214
910000,0.026628257675717276
920000,0.01719271431987484
930000,0.025014398898929358
940000,0.03365887276207407
950000,0.019868735255052648
960000,0.025951597404976684
970000,0.02967567397281527
980000,0.04032600869735082
990000,0.008862463602175314
1000000,0.02233600672334433
10000,0.020137721152665718
20000,0.03323118432114522
30000,0.028223223782454933
40000,0.016521792913166182
50000,0.02500570526657005
60000,0.01670982791110873
70000,0.012397157074883581
80000,0.018730658230682214
90000,0.014966569763297835
100000,0.010290617014591892
110000,0.0066307478429128725
120000,0.0165629174405088
130000,0.018210103161012132
140000,0.023021942066649596
150000,0.033144783616686865
160000,0.02608788656070828
170000,0.023953330811734
180000,0.02836121867100398
190000,0.016108898771926762
200000,0.01991656795144081
210000,0.025934501442437374
220000,0.020120707138751946
230000,0.01647923868149519
240000,0.01757938109027843
250000,0.023300569178536533
260000,0.032563160778954624
270000,0.008294070248181622
280000,0.030361381731927393
290000,0.024039351101964708
300000,0.014708312259366113
310000,0.014669864779959121
320000,0.01559359724633396
330000,0.013568438744793335
340000,0.02837056394976874
350000,0.021204595174640416
360000,0.005247377930209041
370000,0.018637544211621088
380000,0.016281461187948786
390000,0.024795917343969145
400000,0.00871970526253184
410000,0.02092033252120018
420000,0.014713056292384863
430000,0.026567908593763907
440000,0.02468441917250554
450000,0.02149640591815114
460000,0.02302646276851495
470000,0.02240512122710546
480000,0.014705418702214954
490000,0.017682619951665403
500000,0.0176922667461137
510000,0.014760783904542527
520000,0.019856103758017222
530000,0.01884897028406461
540000,0.008913337718695399
550000,0.027017528656870134
560000,0.027360919800897443
570000,0.020266632580508794
580000,0.019112755854924522
590000,0.005723213156064351
600000,0.016026155029733976
610000,0.028475298546254628
620000,0.03135352386161685
630000,0.022266716603189705
640000,0.020362504199147223
650000,0.014302307491501173
660000,0.019519669034828747
670000,0.011313829850405454
680000,0.013845785614103077
690000,0.018402333340297145
700000,0.021273059522112208
710000,0.02386944287767013
720000,0.019268234043071666
730000,0.026379411884893976
740000,0.020162486967941127
750000,0.022761240725715958
760000,0.012200730262945098
770000,0.02084133398408691
780000,0.017823695112019776
790000,0.02178739663213491
800000,0.027331723521153133
810000,0.026322274313618742
820000,0.022505783693244054
830000,0.01287282401074966
840000,0.02628030975659688
850000,0.023432799801230433
860000,0.016341675134996576
870000,0.015299695978562034
880000,0.01862583889936407
890000,0.008687917981296778
900000,0.019909434858709572
910000,0.015935555566102266
920000,0.023189303775628407
930000,0.021775948721915482
940000,0.022657501014570394
950000,0.024541364579151077
960000,0.009847055189311504
970000,0.016492907516658308
980000,0.022127569497873388
990000,0.00425912362212936
1000000,0.020471757867683966
Loading

0 comments on commit f9d67fc

Please sign in to comment.