Merge pull request #54 from Toni-SM/develop

Develop
Toni-SM · Jan 26, 2023 · b760dc7 · b760dc7
2 parents 930b8d7 + ea7bf7a
commit b760dc7
Show file tree

Hide file tree

Showing 6 changed files with 20 additions and 15 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
+## [0.10.1] - 2023-01-26
+### Fixed
+- Tensorboard writer instantiation when `write_interval` is zero
+
 ## [0.10.0] - 2023-01-22
 ### Added
 - Isaac Orbit environment loader

diff --git a/docs/source/_static/imgs/wrapping.svg b/docs/source/_static/imgs/wrapping.svg
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -44,9 +44,7 @@
 }
 
 intersphinx_disabled_domains = ['std']
-
 templates_path = ['_templates']
-
 rst_prolog = """
  .. include:: <s5defs.txt>
 
@@ -55,15 +53,17 @@
 # -- Options for HTML output
 
 html_theme = 'sphinx_rtd_theme'
-
 html_logo = '_static/data/skrl-up.png'
-
 html_favicon = "_static/data/favicon.ico"
-
 html_static_path = ['_static']
-
 html_css_files = ['css/s5defs-roles.css',
                   'css/skrl.css']
 
 # -- Options for EPUB output
+
 epub_show_urls = 'footnote'
+
+# -- Options for copybutton ext
+
+copybutton_prompt_text = r'>>> |\.\.\. '
+copybutton_prompt_is_regexp = True
diff --git a/docs/source/examples/isaacorbit/ppo_reach_franka.py b/docs/source/examples/isaacorbit/ppo_reach_franka.py
@@ -80,7 +80,7 @@ def compute(self, inputs, role):
 cfg_ppo["lambda"] = 0.95
 cfg_ppo["learning_rate"] = 3e-4
 cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
-cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008}
+cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.01}
 cfg_ppo["random_timesteps"] = 0
 cfg_ppo["learning_starts"] = 0
 cfg_ppo["grad_norm_clip"] = 1.0
@@ -90,14 +90,14 @@ def compute(self, inputs, role):
 cfg_ppo["entropy_loss_scale"] = 0.0
 cfg_ppo["value_loss_scale"] = 2.0
 cfg_ppo["kl_threshold"] = 0
-cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01
+cfg_ppo["rewards_shaper"] = None
 cfg_ppo["state_preprocessor"] = RunningStandardScaler
 cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
 cfg_ppo["value_preprocessor"] = RunningStandardScaler
 cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
-# logging to TensorBoard and write checkpoints each 40 and 400 timesteps respectively
-cfg_ppo["experiment"]["write_interval"] = 40
-cfg_ppo["experiment"]["checkpoint_interval"] = 400
+# logging to TensorBoard and write checkpoints each 80 and 800 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 80
+cfg_ppo["experiment"]["checkpoint_interval"] = 800
 
 agent = PPO(models=models_ppo,
             memory=memory,
@@ -108,7 +108,7 @@ def compute(self, inputs, role):
 
 
 # Configure and instantiate the RL trainer
-cfg_trainer = {"timesteps": 8000, "headless": True}
+cfg_trainer = {"timesteps": 16000, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # start training

diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py
@@ -153,7 +153,8 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
             wandb.init(**wandb_kwargs)
 
         # main entry to log data for consumption and visualization by TensorBoard
-        self.writer = SummaryWriter(log_dir=self.experiment_dir)
+        if self.write_interval > 0:
+            self.writer = SummaryWriter(log_dir=self.experiment_dir)
 
         if self.checkpoint_interval > 0:
             os.makedirs(os.path.join(self.experiment_dir, "checkpoints"), exist_ok=True)

diff --git a/skrl/version.txt b/skrl/version.txt
@@ -1 +1 @@
-0.10.0
+0.10.1