Skip to content

Commit

Permalink
Merge pull request #54 from Toni-SM/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
Toni-SM committed Jan 26, 2023
2 parents 930b8d7 + ea7bf7a commit b760dc7
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 15 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.10.1] - 2023-01-26
### Fixed
- Tensorboard writer instantiation when `write_interval` is zero

## [0.10.0] - 2023-01-22
### Added
- Isaac Orbit environment loader
Expand Down
2 changes: 1 addition & 1 deletion docs/source/_static/imgs/wrapping.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 6 additions & 6 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@
}

intersphinx_disabled_domains = ['std']

templates_path = ['_templates']

rst_prolog = """
.. include:: <s5defs.txt>
Expand All @@ -55,15 +53,17 @@
# -- Options for HTML output

html_theme = 'sphinx_rtd_theme'

html_logo = '_static/data/skrl-up.png'

html_favicon = "_static/data/favicon.ico"

html_static_path = ['_static']

html_css_files = ['css/s5defs-roles.css',
'css/skrl.css']

# -- Options for EPUB output

epub_show_urls = 'footnote'

# -- Options for copybutton ext

copybutton_prompt_text = r'>>> |\.\.\. '
copybutton_prompt_is_regexp = True
12 changes: 6 additions & 6 deletions docs/source/examples/isaacorbit/ppo_reach_franka.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def compute(self, inputs, role):
cfg_ppo["lambda"] = 0.95
cfg_ppo["learning_rate"] = 3e-4
cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008}
cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.01}
cfg_ppo["random_timesteps"] = 0
cfg_ppo["learning_starts"] = 0
cfg_ppo["grad_norm_clip"] = 1.0
Expand All @@ -90,14 +90,14 @@ def compute(self, inputs, role):
cfg_ppo["entropy_loss_scale"] = 0.0
cfg_ppo["value_loss_scale"] = 2.0
cfg_ppo["kl_threshold"] = 0
cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01
cfg_ppo["rewards_shaper"] = None
cfg_ppo["state_preprocessor"] = RunningStandardScaler
cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
cfg_ppo["value_preprocessor"] = RunningStandardScaler
cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
# logging to TensorBoard and write checkpoints each 40 and 400 timesteps respectively
cfg_ppo["experiment"]["write_interval"] = 40
cfg_ppo["experiment"]["checkpoint_interval"] = 400
# logging to TensorBoard and write checkpoints each 80 and 800 timesteps respectively
cfg_ppo["experiment"]["write_interval"] = 80
cfg_ppo["experiment"]["checkpoint_interval"] = 800

agent = PPO(models=models_ppo,
memory=memory,
Expand All @@ -108,7 +108,7 @@ def compute(self, inputs, role):


# Configure and instantiate the RL trainer
cfg_trainer = {"timesteps": 8000, "headless": True}
cfg_trainer = {"timesteps": 16000, "headless": True}
trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)

# start training
Expand Down
3 changes: 2 additions & 1 deletion skrl/agents/torch/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
wandb.init(**wandb_kwargs)

# main entry to log data for consumption and visualization by TensorBoard
self.writer = SummaryWriter(log_dir=self.experiment_dir)
if self.write_interval > 0:
self.writer = SummaryWriter(log_dir=self.experiment_dir)

if self.checkpoint_interval > 0:
os.makedirs(os.path.join(self.experiment_dir, "checkpoints"), exist_ok=True)
Expand Down
2 changes: 1 addition & 1 deletion skrl/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.10.0
0.10.1

0 comments on commit b760dc7

Please sign in to comment.