From bca56ae6cef79fa69945c26951a269e4a685520a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Thu, 26 Jan 2023 09:59:54 +0100 Subject: [PATCH 1/6] Instantiate Tensorboard writer if write interval is greater than zero --- skrl/agents/torch/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index 7d0c4c00..c7ea2b46 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -153,7 +153,8 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: wandb.init(**wandb_kwargs) # main entry to log data for consumption and visualization by TensorBoard - self.writer = SummaryWriter(log_dir=self.experiment_dir) + if self.write_interval > 0: + self.writer = SummaryWriter(log_dir=self.experiment_dir) if self.checkpoint_interval > 0: os.makedirs(os.path.join(self.experiment_dir, "checkpoints"), exist_ok=True) From 16886c1abd906e521b51ae97e1b5a52d75fa1e31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Thu, 26 Jan 2023 10:02:07 +0100 Subject: [PATCH 2/6] Update wrapping image file in docs --- docs/source/_static/imgs/wrapping.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/_static/imgs/wrapping.svg b/docs/source/_static/imgs/wrapping.svg index cd8605c0..513cada4 100755 --- a/docs/source/_static/imgs/wrapping.svg +++ b/docs/source/_static/imgs/wrapping.svg @@ -1 +1 @@ -DeepMindOmniverseIsaac GymGymnum_envs: intdevice: ML framework specific devicestate_space: gym/gymnasiumspaceobservation_space: gym/gymnasiumspaceaction_space: gym/gymnasiumspacereset()-> states, infosstep(actions)-> states, rewards, terminated, truncated, infosrender()close()propertiesmethodswrap_env()Isaac GymGymnasium +DeepMindOmniverseIsaac GymGymnum_envs: intdevice: ML framework specific devicestate_space: gym/gymnasiumspaceobservation_space: gym/gymnasiumspaceaction_space: gym/gymnasiumspacereset()-> states, infosstep(actions)-> states, rewards, terminated, truncated, infosrender()close()propertiesmethodsIsaac OrbitGymnasiumIsaac Gymwrap_env()OthersInterfaces From 289fd7c10121c689e75fd2820917001c317e92d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Thu, 26 Jan 2023 10:02:37 +0100 Subject: [PATCH 3/6] Add options for sphinx copybutton extension --- docs/source/conf.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index e246e5ed..78cd9d2a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -44,9 +44,7 @@ } intersphinx_disabled_domains = ['std'] - templates_path = ['_templates'] - rst_prolog = """ .. include:: @@ -55,15 +53,17 @@ # -- Options for HTML output html_theme = 'sphinx_rtd_theme' - html_logo = '_static/data/skrl-up.png' - html_favicon = "_static/data/favicon.ico" - html_static_path = ['_static'] - html_css_files = ['css/s5defs-roles.css', 'css/skrl.css'] # -- Options for EPUB output + epub_show_urls = 'footnote' + +# -- Options for copybutton ext + +copybutton_prompt_text = r'>>> |\.\.\. ' +copybutton_prompt_is_regexp = True From 6072abf08c31bfca7375366b46afbc4b2c50196d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Thu, 26 Jan 2023 10:05:11 +0100 Subject: [PATCH 4/6] Increase PATCH version --- skrl/version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skrl/version.txt b/skrl/version.txt index 78bc1abd..57121573 100644 --- a/skrl/version.txt +++ b/skrl/version.txt @@ -1 +1 @@ -0.10.0 +0.10.1 From 2e1541148d3fbc6cc0ffaa2ee51cccf61744bee8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Thu, 26 Jan 2023 10:06:08 +0100 Subject: [PATCH 5/6] Update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d481ec9..57bb6255 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [0.10.1] - 2023-01-26 +### Fixed +- Tensorboard writer instantiation when `write_interval` is zero + ## [0.10.0] - 2023-01-22 ### Added - Isaac Orbit environment loader From ae3ce502618ce1400c62c0212bc081254e1c94b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Thu, 26 Jan 2023 10:21:24 +0100 Subject: [PATCH 6/6] Update Isaac Orbit example hyperparameters --- docs/source/examples/isaacorbit/ppo_reach_franka.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/examples/isaacorbit/ppo_reach_franka.py b/docs/source/examples/isaacorbit/ppo_reach_franka.py index c00c6ba7..ae519ed2 100644 --- a/docs/source/examples/isaacorbit/ppo_reach_franka.py +++ b/docs/source/examples/isaacorbit/ppo_reach_franka.py @@ -80,7 +80,7 @@ def compute(self, inputs, role): cfg_ppo["lambda"] = 0.95 cfg_ppo["learning_rate"] = 3e-4 cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL -cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008} +cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.01} cfg_ppo["random_timesteps"] = 0 cfg_ppo["learning_starts"] = 0 cfg_ppo["grad_norm_clip"] = 1.0 @@ -90,14 +90,14 @@ def compute(self, inputs, role): cfg_ppo["entropy_loss_scale"] = 0.0 cfg_ppo["value_loss_scale"] = 2.0 cfg_ppo["kl_threshold"] = 0 -cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01 +cfg_ppo["rewards_shaper"] = None cfg_ppo["state_preprocessor"] = RunningStandardScaler cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} cfg_ppo["value_preprocessor"] = RunningStandardScaler cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device} -# logging to TensorBoard and write checkpoints each 40 and 400 timesteps respectively -cfg_ppo["experiment"]["write_interval"] = 40 -cfg_ppo["experiment"]["checkpoint_interval"] = 400 +# logging to TensorBoard and write checkpoints each 80 and 800 timesteps respectively +cfg_ppo["experiment"]["write_interval"] = 80 +cfg_ppo["experiment"]["checkpoint_interval"] = 800 agent = PPO(models=models_ppo, memory=memory, @@ -108,7 +108,7 @@ def compute(self, inputs, role): # Configure and instantiate the RL trainer -cfg_trainer = {"timesteps": 8000, "headless": True} +cfg_trainer = {"timesteps": 16000, "headless": True} trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent) # start training