Skip to content

Commit

Permalink
Merge pull request NVlabs#3 from krishpop/stabalise
Browse files Browse the repository at this point in the history
Stabalise, cleanup and simplify repo
  • Loading branch information
krishpop authored Aug 6, 2023
2 parents c276c46 + b288142 commit 38991bf
Show file tree
Hide file tree
Showing 38 changed files with 4,723 additions and 2,725 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
*.swp
*.swo
tags
**.out
**.log

dflex/dflex/kernels*/
**logs/
Expand All @@ -12,3 +14,7 @@ dflex/dflex/kernels*/
**.egg-info/

wandb/
checkpoints/
multirun/

scripts/sweeps/
595 changes: 333 additions & 262 deletions dflex/dflex/model.py

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion dflex/dflex/sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -1190,6 +1190,7 @@ def eval_rigid_contacts_art(
contact_mat: df.tensor(int),
materials: df.tensor(float),
body_f_s: df.tensor(df.spatial_vector),
contact_changed: df.tensor(float),
):
tid = df.tid()

Expand Down Expand Up @@ -1251,6 +1252,7 @@ def eval_rigid_contacts_art(
t_total = df.cross(p, f_total)

df.atomic_add(body_f_s, c_body, df.spatial_vector(t_total, f_total))
df.atomic_add(contact_changed, c_body, 1.0)


@df.func
Expand Down Expand Up @@ -2683,7 +2685,7 @@ def _simulate(self, tape, model, state_in, state_out, dt, update_mass_matrix=Tru
model.contact_material,
model.shape_materials,
],
outputs=[state_out.body_f_s],
outputs=[state_out.body_f_s, state_out.contact_changed],
adapter=model.adapter,
preserve_output=True,
)
Expand Down
31 changes: 31 additions & 0 deletions scripts/cfg/alg/ahac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
_target_: shac.algorithms.ahac.AHAC
_recursive_: False
actor_config:
_target_: shac.models.actor.ActorStochasticMLP
units: ${resolve_child:[64,64],${env.shac.actor_mlp},units}
activation: elu
critic_config:
_target_: shac.models.critic.CriticMLP
units: ${resolve_child:[64,64],${env.shac.critic_mlp},units}
activation: elu
actor_lr: ${resolve_child:2e-3,${env.shac},actor_lr}
critic_lr: ${resolve_child:2e-3,${env.shac},critic_lr}
lr_schedule: linear
target_critic_alpha: ${resolve_child:0.2,${env.shac},target_critic_alpha}
obs_rms: True
ret_rms: False
critic_iterations: 16
critic_batches: 4
critic_method: td-lambda # ('td-lambda', 'one-step')
lam: 0.95
gamma: 0.99
max_epochs: ${resolve_child:2000,${env.shac},max_epochs}
steps_min: 4
steps_max: 64
contact_theshold: 1e9
grad_norm: 1.0
save_interval: ${resolve_child:400,${env.shac},save_interval}
stochastic_eval: False
eval_runs: 12
train: ${general.train}
device: ${general.device}
135 changes: 69 additions & 66 deletions scripts/cfg/alg/ppo.yaml
Original file line number Diff line number Diff line change
@@ -1,72 +1,75 @@
algo:
name: a2c_continuous
name: ppo

model:
name: continuous_a2c_logstd
params:
algo:
name: a2c_continuous

network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
model:
name: continuous_a2c_logstd

mu_init:
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None

mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: ${resolve_child:[64, 64],${env.ppo.actor_mlp},units}
activation: elu
d2rl: False

initializer:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: ${resolve_default:[64, 64],${..env.actor_mlp.units}}
activation: elu
d2rl: False

initializer:
name: default
regularizer:
name: None
regularizer:
name: None

load_checkpoint: False
load_path: nn/${env.name}_ppo.pth

load_checkpoint: False
load_path: nn/${..env.name}_ppo.pth
config:
name: ${env.name}_ppo
env_name: ${env.name}
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
reward_shaper:
scale_value: 0.01
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: ${resolve_child:3e-4,${env.ppo},lr}
lr_schedule: adaptive
lr_threshold: 0.008
kl_threshold: 0.008
score_to_win: 20000
max_epochs: ${resolve_child:5000,${env.ppo},max_epochs}
save_best_after: ${resolve_child:100,${env.ppo},save_best_after}
save_frequency: ${resolve_child:400,${env.ppo},save_interval}
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
num_actors: ${resolve_child:2048,${env.ppo},num_actors}
steps_num: ${resolve_child:32,${env.ppo},steps_num}
minibatch_size: ${resolve_child:16384,${env.ppo},minibatch_size}
mini_epochs: 5
critic_coef: 4
clip_value: True
seq_len: 4
bounds_loss_coef: 0.0001

config:
name: ${..env.name}_ppo
env_name: ${..env.name}
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
reward_shaper:
scale_value: 0.01
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: ${resolve_default:3e-4${...env.ppo.lr}}
lr_schedule: adaptive
lr_threshold: 0.008
kl_threshold: 0.008
score_to_win: 20000
max_epochs: ${resolve_default:5000,${...env.ppo.max_epochs}}
save_best_after: ${resolve_Default:100${...env.ppo.save_best_after}}
save_frequency: ${resolve_default:400,${...env.ppo.save_interval}}
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
num_actors: ${resolve_default:2048,${..env.ppo.num_actors}}
steps_num: ${resolve_default:32,${...env.ppo.max_epochs}}
minibatch_size: ${resolve_default:16384,${...env.ppo.minibatch_size}
mini_epochs: 5
critic_coef: 4
clip_value: True
seq_len: 4
bounds_loss_coef: 0.0001

player:
games_num: ${resolve_default:24,${....env.player.games_num}}
num_actors: ${resolve_default:3,${....env.player.num_actors}}
determenistic: True
print_stats: True
player:
games_num: ${resolve_child:24,${env.player},games_num}
num_actors: ${resolve_child:3,${env.player},num_actors}
determenistic: True
print_stats: True
72 changes: 29 additions & 43 deletions scripts/cfg/alg/shac.yaml
Original file line number Diff line number Diff line change
@@ -1,43 +1,29 @@
name: shac
params:
network:
actor: ActorStochasticMLP # ActorDeterministicMLP
actor_mlp:
units: ${env.shac.actor_mlp.units}
activation: elu

critic: CriticMLP
critic_mlp:
units: ${env.shac.critic_mlp.units}
activation: elu

config:
name: ${env.name}_shac
actor_learning_rate: 2e-3 # ${resolve_default:2e-3,${..env.actor_lr}} # adam
critic_learning_rate: 2e-3 # ${resolve_default:2e-3,${..env.critic_lr}} # adam
lr_schedule: linear # ('constant', 'linear')
target_critic_alpha: 0.2 # ${resolve_default:0.2,${..env.target_critic_alpha}}
obs_rms: True
ret_rms: False
critic_iterations: 16
critic_method: td-lambda # ('td-lambda', 'one-step')
lambda: 0.95
num_batch: 4
gamma: 0.99
betas:
- 0.7
- 0.95 # adam
max_epochs: ${env.shac.max_epochs}
steps_min: 8
steps_num: 32
grad_norm: 1.0
truncate_grads: True
num_actors: ${env.config.num_envs} # ${resolve_default:64,${..env.config.num_envs}}
save_interval: 400 # ${resolve_default:400,${..env.save_interval}}
contact_theshold: 150

player:
determenistic: True
games_num: ${env.player.games_num}
num_actors: ${env.player.num_actors}
print_stats: True
_target_: shac.algorithms.shac.SHAC
_recursive_: False
actor_config:
_target_: shac.models.actor.ActorStochasticMLP
units: ${resolve_child:[64,64],${env.shac.actor_mlp},units}
activation: elu
critic_config:
_target_: shac.models.critic.CriticMLP
units: ${resolve_child:[64,64],${env.shac.critic_mlp},units}
activation: elu
actor_lr: ${resolve_child:2e-3,${env.shac},actor_lr}
critic_lr: ${resolve_child:2e-3,${env.shac},critic_lr}
lr_schedule: linear
target_critic_alpha: ${resolve_child:0.2,${env.shac},target_critic_alpha}
obs_rms: True
ret_rms: False
critic_iterations: 16
critic_batches: 4
critic_method: td-lambda # ('td-lambda', 'one-step')
lam: 0.95
gamma: 0.99
max_epochs: ${resolve_child:2000,${env.shac},max_epochs}
steps_num: 32
grad_norm: 1.0
save_interval: ${resolve_child:400,${env.shac},save_interval}
stochastic_eval: False
eval_runs: 12
train: ${general.train}
device: ${general.device}
97 changes: 31 additions & 66 deletions scripts/cfg/alg/shac2.yaml
Original file line number Diff line number Diff line change
@@ -1,66 +1,31 @@
name: shac2
params:
network:
actor:
_target_: shac.models.actor.ActorStochasticMLP # ActorDeterministicMLP
device: ${general.device}
cfg_network:
actor_mlp:
units: ${env.shac2.actor_mlp.units}
activation: elu

critic:
_target_: shac.models.critic.QCriticMLP
cfg_network:
critic_mlp:
units: ${env.shac2.critic_mlp.units}
activation: elu

config:
name: ${env.name}_${...name}
actor_optimizer: ${..default_actor_opt}
critic_optimizer: ${..default_critic_opt}
lr_schedule: linear # ['constant', 'linear', 'adaptive']
target_critic_alpha: ${resolve_default:0.4,${env.shac2.target_critic_alpha}}
obs_rms: True
ret_rms: False
critic_iterations: 16
critic_method: td-lambda # ['td-lambda', 'one-step']
lam: ${env.shac2.lambda}
num_batch: 4
gamma: 0.99
max_epochs: ${resolve_default:2000,${env.shac2.max_epochs}}
steps_num: ${resolve_default:32,${env.shac2.steps_num}}
grad_norm: 1.0
truncate_grads: True
save_interval: ${resolve_default:400,${env.shac2.save_interval}}
early_stopping_patience: ${env.shac2.max_epochs}
rew_scale: 1.0
score_keys: []

player:
determenistic: True
games_num: ${resolve_default:1,${env.games_num}}
num_actors: ${resolve_default:1,${env.player.num_actors}}
print_stats: True

default_actor_opt:
_target_: torch.optim.Adam
lr: ${env.shac2.actor_lr} # adam
betas: ${env.shac2.betas} # adam

default_critic_opt:
_target_: torch.optim.Adam
lr: ${env.shac2.critic_lr} # adam
betas: ${env.shac2.betas} # adam

default_adaptive_scheduler:
_target_: rl_games.common.schedulers.AdaptiveScheduler
kl_threshold : 0.01

default_linear_scheduler:
_target_: rl_games.common.schedulers.LinearScheduler
start_lr: ${..default_actor_opt.lr}
min_lr: 1e-5
max_steps: ${..config.max_epochs}
apply_to_entropy: False
_target_: shac.algorithms.shac2.SHAC
_recursive_: False
actor_config:
_target_: shac.models.actor.ActorStochasticMLP
units: ${resolve_child:[64,64],${env.shac.actor_mlp},units}
activation: elu
critic_config:
_target_: shac.models.critic.CriticMLP
units: ${resolve_child:[64,64],${env.shac.critic_mlp},units}
activation: elu
actor_lr: ${resolve_child:2e-3,${env.shac},actor_lr}
critic_lr: ${resolve_child:2e-3,${env.shac},critic_lr}
lr_schedule: linear
target_critic_alpha: ${resolve_child:0.2,${env.shac},target_critic_alpha}
obs_rms: True
ret_rms: False
critic_iterations: 16
critic_batches: 4
critic_method: td-lambda # ('td-lambda', 'one-step')
lam: 0.95
gamma: 0.99
max_epochs: ${resolve_child:2000,${env.shac},max_epochs}
steps_num: 32
steps_min: 4
contact_truncation: True
grad_norm: 1.0
save_interval: ${resolve_child:400,${env.shac},save_interval}
stochastic_eval: False
eval_runs: 12
train: ${general.train}
device: ${general.device}
Loading

0 comments on commit 38991bf

Please sign in to comment.