From c588b47bf416756df8b9ab38ae125a2eb750c513 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Wed, 13 Jul 2022 23:28:48 +0200
Subject: [PATCH 001/108] Update Omniverse Isaac Gym examples

---
 .../examples/omniisaacgym/ppo_allegro_hand.py      |  5 +++++
 docs/source/examples/omniisaacgym/ppo_ant.py       |  5 +++++
 docs/source/examples/omniisaacgym/ppo_ant_mt.py    |  5 +++++
 docs/source/examples/omniisaacgym/ppo_cartpole.py  |  5 +++++
 .../examples/omniisaacgym/ppo_cartpole_mt.py       |  5 +++++
 docs/source/examples/omniisaacgym/ppo_humanoid.py  |  5 +++++
 .../examples/omniisaacgym/ppo_shadow_hand.py       |  5 +++++
 docs/source/intro/examples.rst                     | 14 +++++++-------
 8 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py
index 04fd3a3b..02e06628 100644
--- a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py
+++ b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py
@@ -6,6 +6,7 @@
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 from skrl.envs.torch import load_omniverse_isaacgym_env
@@ -97,6 +98,10 @@ def compute(self, states, taken_actions):
 cfg_ppo["value_loss_scale"] = 2.0
 cfg_ppo["kl_threshold"] = 0
 cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
 # logging to TensorBoard and write checkpoints each 200 and 2000 timesteps respectively
 cfg_ppo["experiment"]["write_interval"] = 200
 cfg_ppo["experiment"]["checkpoint_interval"] = 2000
diff --git a/docs/source/examples/omniisaacgym/ppo_ant.py b/docs/source/examples/omniisaacgym/ppo_ant.py
index 2244d1be..dc446032 100644
--- a/docs/source/examples/omniisaacgym/ppo_ant.py
+++ b/docs/source/examples/omniisaacgym/ppo_ant.py
@@ -6,6 +6,7 @@
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 from skrl.envs.torch import load_omniverse_isaacgym_env
@@ -97,6 +98,10 @@ def compute(self, states, taken_actions):
 cfg_ppo["value_loss_scale"] = 1.0
 cfg_ppo["kl_threshold"] = 0
 cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
 # logging to TensorBoard and write checkpoints each 40 and 400 timesteps respectively
 cfg_ppo["experiment"]["write_interval"] = 40
 cfg_ppo["experiment"]["checkpoint_interval"] = 400
diff --git a/docs/source/examples/omniisaacgym/ppo_ant_mt.py b/docs/source/examples/omniisaacgym/ppo_ant_mt.py
index 3aea2fb5..e576af3b 100644
--- a/docs/source/examples/omniisaacgym/ppo_ant_mt.py
+++ b/docs/source/examples/omniisaacgym/ppo_ant_mt.py
@@ -8,6 +8,7 @@
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 from skrl.envs.torch import load_omniverse_isaacgym_env
@@ -99,6 +100,10 @@ def compute(self, states, taken_actions):
 cfg_ppo["value_loss_scale"] = 1.0
 cfg_ppo["kl_threshold"] = 0
 cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
 # logging to TensorBoard and write checkpoints each 40 and 400 timesteps respectively
 cfg_ppo["experiment"]["write_interval"] = 40
 cfg_ppo["experiment"]["checkpoint_interval"] = 400
diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole.py b/docs/source/examples/omniisaacgym/ppo_cartpole.py
index 034b31c0..096e67a9 100644
--- a/docs/source/examples/omniisaacgym/ppo_cartpole.py
+++ b/docs/source/examples/omniisaacgym/ppo_cartpole.py
@@ -6,6 +6,7 @@
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 from skrl.envs.torch import load_omniverse_isaacgym_env
@@ -93,6 +94,10 @@ def compute(self, states, taken_actions):
 cfg_ppo["value_loss_scale"] = 2.0
 cfg_ppo["kl_threshold"] = 0
 cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.1
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
 # logging to TensorBoard and write checkpoints each 16 and 80 timesteps respectively
 cfg_ppo["experiment"]["write_interval"] = 16
 cfg_ppo["experiment"]["checkpoint_interval"] = 80
diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
index 888caaeb..8f1928e6 100644
--- a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
+++ b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
@@ -8,6 +8,7 @@
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 from skrl.envs.torch import load_omniverse_isaacgym_env
@@ -95,6 +96,10 @@ def compute(self, states, taken_actions):
 cfg_ppo["value_loss_scale"] = 2.0
 cfg_ppo["kl_threshold"] = 0
 cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.1
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
 # logging to TensorBoard and write checkpoints each 16 and 80 timesteps respectively
 cfg_ppo["experiment"]["write_interval"] = 16
 cfg_ppo["experiment"]["checkpoint_interval"] = 80
diff --git a/docs/source/examples/omniisaacgym/ppo_humanoid.py b/docs/source/examples/omniisaacgym/ppo_humanoid.py
index ad9cd876..9fd60330 100644
--- a/docs/source/examples/omniisaacgym/ppo_humanoid.py
+++ b/docs/source/examples/omniisaacgym/ppo_humanoid.py
@@ -6,6 +6,7 @@
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 from skrl.envs.torch import load_omniverse_isaacgym_env
@@ -97,6 +98,10 @@ def compute(self, states, taken_actions):
 cfg_ppo["value_loss_scale"] = 2.0
 cfg_ppo["kl_threshold"] = 0
 cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
 # logging to TensorBoard and write checkpoints each 160 and 1600 timesteps respectively
 cfg_ppo["experiment"]["write_interval"] = 160
 cfg_ppo["experiment"]["checkpoint_interval"] = 1600
diff --git a/docs/source/examples/omniisaacgym/ppo_shadow_hand.py b/docs/source/examples/omniisaacgym/ppo_shadow_hand.py
index 90648118..e7b66041 100644
--- a/docs/source/examples/omniisaacgym/ppo_shadow_hand.py
+++ b/docs/source/examples/omniisaacgym/ppo_shadow_hand.py
@@ -6,6 +6,7 @@
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 from skrl.envs.torch import load_omniverse_isaacgym_env
@@ -101,6 +102,10 @@ def compute(self, states, taken_actions):
 cfg_ppo["value_loss_scale"] = 2.0
 cfg_ppo["kl_threshold"] = 0
 cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
 # logging to TensorBoard and write checkpoints each 200 and 2000 timesteps respectively
 cfg_ppo["experiment"]["write_interval"] = 200
 cfg_ppo["experiment"]["checkpoint_interval"] = 2000
diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index a9c48ad1..bf3cd29b 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -608,7 +608,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
                 .. literalinclude:: ../examples/omniisaacgym/ppo_allegro_hand.py
                     :language: python
                     :linenos:
-                    :emphasize-lines: 10-11, 57-58
+                    :emphasize-lines: 11-12, 58-59
             
             .. tab:: Ant
                 
@@ -617,7 +617,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
                 .. literalinclude:: ../examples/omniisaacgym/ppo_ant.py
                     :language: python
                     :linenos:
-                    :emphasize-lines: 10-11, 57-58
+                    :emphasize-lines: 11-12, 58-59
 
             .. tab:: Ant (multi-threaded)
                 
@@ -626,7 +626,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
                 .. literalinclude:: ../examples/omniisaacgym/ppo_ant_mt.py
                     :language: python
                     :linenos:
-                    :emphasize-lines: 1, 12-13, 59-60, 119, 123
+                    :emphasize-lines: 1, 13-14, 60-61, 124, 128
 
             .. tab:: Cartpole
                 
@@ -635,7 +635,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
                 .. literalinclude:: ../examples/omniisaacgym/ppo_cartpole.py
                     :language: python
                     :linenos:
-                    :emphasize-lines: 10-11, 53-54
+                    :emphasize-lines: 11-12, 54-55
 
             .. tab:: Cartpole (multi-threaded)
                 
@@ -644,7 +644,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
                 .. literalinclude:: ../examples/omniisaacgym/ppo_cartpole_mt.py
                     :language: python
                     :linenos:
-                    :emphasize-lines: 1, 12-13, 55-56, 115, 119
+                    :emphasize-lines: 1, 13-14, 56-57, 120, 124
                     
             .. tab:: Humanoid
                 
@@ -653,7 +653,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
                 .. literalinclude:: ../examples/omniisaacgym/ppo_humanoid.py
                     :language: python
                     :linenos:
-                    :emphasize-lines: 10-11, 57-58
+                    :emphasize-lines: 11-12, 58-59
                     
             .. tab:: ShadowHand
                 
@@ -662,7 +662,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
                 .. literalinclude:: ../examples/omniisaacgym/ppo_shadow_hand.py
                     :language: python
                     :linenos:
-                    :emphasize-lines: 10-11, 61-62
+                    :emphasize-lines: 11-12, 62-63
 
 .. raw:: html
 

From c7e44c73d18097142af065acaa15392b8f4ea88f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 24 Jul 2022 23:49:50 +0200
Subject: [PATCH 002/108] Add AMP agent

---
 skrl/agents/torch/amp/__init__.py |   1 +
 skrl/agents/torch/amp/amp.py      | 553 ++++++++++++++++++++++++++++++
 2 files changed, 554 insertions(+)
 create mode 100644 skrl/agents/torch/amp/__init__.py
 create mode 100644 skrl/agents/torch/amp/amp.py

diff --git a/skrl/agents/torch/amp/__init__.py b/skrl/agents/torch/amp/__init__.py
new file mode 100644
index 00000000..9a6ca76e
--- /dev/null
+++ b/skrl/agents/torch/amp/__init__.py
@@ -0,0 +1 @@
+from .amp import AMP, AMP_DEFAULT_CONFIG
\ No newline at end of file
diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py
new file mode 100644
index 00000000..e2befc77
--- /dev/null
+++ b/skrl/agents/torch/amp/amp.py
@@ -0,0 +1,553 @@
+from typing import Callable, Union, Tuple, Dict, Any
+
+import gym
+import math
+import copy
+import itertools
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ....memories.torch import Memory
+from ....models.torch import Model
+
+from .. import Agent
+
+
+AMP_DEFAULT_CONFIG = {
+    "rollouts": 16,                 # number of rollouts before updating
+    "learning_epochs": 6,           # number of learning epochs during each update
+    "mini_batches": 2,              # number of mini batches during each learning epoch
+    
+    "discount_factor": 0.99,        # discount factor (gamma)
+    "lambda": 0.95,                 # TD(lambda) coefficient (lam) for computing returns and advantages
+    
+    "learning_rate": 5e-5,                  # learning rate
+    "discriminator_learning_rate": 5e-5,    # discriminator learning rate
+    "learning_rate_scheduler": None,        # learning rate scheduler class (see torch.optim.lr_scheduler)
+    "learning_rate_scheduler_kwargs": {},   # learning rate scheduler's kwargs (e.g. {"step_size": 1e-3})
+
+    "state_preprocessor": None,             # state preprocessor class (see skrl.resources.preprocessors)
+    "state_preprocessor_kwargs": {},        # state preprocessor's kwargs (e.g. {"size": env.observation_space})
+    "value_preprocessor": None,             # value preprocessor class (see skrl.resources.preprocessors)
+    "value_preprocessor_kwargs": {},        # value preprocessor's kwargs (e.g. {"size": 1})
+    "amp_state_preprocessor": None,         # AMP state preprocessor class (see skrl.resources.preprocessors)
+    "amp_state_preprocessor_kwargs": {},    # AMP state preprocessor's kwargs (e.g. {"size": env.amp_observation_space})
+
+    "random_timesteps": 0,          # random exploration steps
+    "learning_starts": 0,           # learning starts after this many steps
+
+    "grad_norm_clip": 0.0,              # clipping coefficient for the norm of the gradients
+    "ratio_clip": 0.2,                  # clipping coefficient for computing the clipped surrogate objective
+    "value_clip": 0.2,                  # clipping coefficient for computing the value loss (if clip_predicted_values is True)
+    "clip_predicted_values": False,     # clip predicted values during value loss computation
+
+    "entropy_loss_scale": 0.0,          # entropy loss scaling factor
+    "value_loss_scale": 2.5,            # value loss scaling factor
+    "discriminator_loss_scale": 5.0,    # discriminator loss scaling factor
+
+    "amp_batch_size": 512,                  # batch size for updating the reference motion dataset
+    "task_reward_weight": 0.0,              # task-reward weight (wG)
+    "style_reward_weight": 1.0,             # style-reward weight (wS)
+    "discriminator_reward_scale": 2,                    # discriminator reward scaling factor
+    "discriminator_logit_regularization_scale": 0.05,   # logit regularization scale factor for the discriminator loss
+    "discriminator_gradient_penalty_scale": 5,          # gradient penalty scaling factor for the discriminator loss
+    "discriminator_weight_decay_scale": 0.0001,         # weight decay scaling factor for the discriminator loss
+
+    "rewards_shaper": None,         # rewards shaping function: Callable(reward, timestep, timesteps) -> reward
+
+    "experiment": {
+        "directory": "",            # experiment's parent directory
+        "experiment_name": "",      # experiment name
+        "write_interval": 250,      # TensorBoard writing interval (timesteps)
+
+        "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
+        "checkpoint_policy_only": True,     # checkpoint for policy only
+    }
+}
+
+
+class AMP(Agent):
+    def __init__(self, 
+                 models: Dict[str, Model], 
+                 memory: Union[Memory, Tuple[Memory], None] = None, 
+                 observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
+                 action_space: Union[int, Tuple[int], gym.Space, None] = None, 
+                 device: Union[str, torch.device] = "cuda:0", 
+                 cfg: dict = {},
+                 amp_observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
+                 motion_dataset: Union[Memory, None] = None,
+                 reply_buffer: Union[Memory, None] = None,
+                 collect_reference_motions: Union[Callable[[int], torch.Tensor], None] = None,
+                 collect_observation: Union[Callable[[], torch.Tensor], None] = None) -> None:
+        """Adversarial Motion Priors (AMP)
+
+        https://arxiv.org/abs/2104.02180
+        
+        The implementation is adapted from the NVIDIA IsaacGymEnvs
+        (https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/isaacgymenvs/learning/amp_continuous.py)
+
+        :param models: Models used by the agent
+        :type models: dictionary of skrl.models.torch.Model
+        :param memory: Memory to storage the transitions.
+                       If it is a tuple, the first element will be used for training and 
+                       for the rest only the environment transitions will be added
+        :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None
+        :param observation_space: Observation/state space or shape (default: None)
+        :type observation_space: int, tuple or list of integers, gym.Space or None, optional
+        :param action_space: Action space or shape (default: None)
+        :type action_space: int, tuple or list of integers, gym.Space or None, optional
+        :param device: Computing device (default: "cuda:0")
+        :type device: str or torch.device, optional
+        :param cfg: Configuration dictionary
+        :type cfg: dict
+        :param amp_observation_space: AMP observation/state space or shape (default: None)
+        :type amp_observation_space: int, tuple or list of integers, gym.Space or None
+        :param motion_dataset: Reference motion dataset: M (default: None) 
+        :type motion_dataset: skrl.memory.torch.Memory or None
+        :param reply_buffer: Reply buffer for preventing discriminator overfitting: B (default: None)
+        :type reply_buffer: skrl.memory.torch.Memory or None
+        :param collect_reference_motions: Callable to collect reference motions (default: None)
+        :type collect_reference_motions: Callable[[int], torch.Tensor] or None
+        :param collect_observation: Callable to collect observation (default: None)
+        :type collect_observation: Callable[[], torch.Tensor] or None
+
+        :raises KeyError: If the models dictionary is missing a required key
+        """
+        _cfg = copy.deepcopy(AMP_DEFAULT_CONFIG)
+        _cfg.update(cfg)
+        super().__init__(models=models, 
+                         memory=memory, 
+                         observation_space=observation_space, 
+                         action_space=action_space, 
+                         device=device, 
+                         cfg=_cfg)
+
+        self.amp_observation_space = amp_observation_space
+        self.motion_dataset = motion_dataset
+        self.reply_buffer = reply_buffer
+        self.collect_reference_motions = collect_reference_motions
+        self.collect_observation = collect_observation
+
+        # models
+        self.policy = self.models.get("policy", None)
+        self.value = self.models.get("value", None)
+        self.discriminator = self.models.get("discriminator", None)
+
+        # checkpoint models
+        self.checkpoint_models = {"policy": self.policy} if self.checkpoint_policy_only else self.models
+
+        # configuration
+        self._learning_epochs = self.cfg["learning_epochs"]
+        self._mini_batches = self.cfg["mini_batches"]
+        self._rollouts = self.cfg["rollouts"]
+        self._rollout = 0
+
+        self._grad_norm_clip = self.cfg["grad_norm_clip"]
+        self._ratio_clip = self.cfg["ratio_clip"]
+        self._value_clip = self.cfg["value_clip"]
+        self._clip_predicted_values = self.cfg["clip_predicted_values"]
+
+        self._value_loss_scale = self.cfg["value_loss_scale"]
+        self._entropy_loss_scale = self.cfg["entropy_loss_scale"]
+        self._discriminator_loss_scale = self.cfg["discriminator_loss_scale"]
+
+        self._learning_rate = self.cfg["learning_rate"]
+        self._discriminator_learning_rate = self.cfg["discriminator_learning_rate"]
+        self._learning_rate_scheduler = self.cfg["learning_rate_scheduler"]
+
+        self._state_preprocessor = self.cfg["state_preprocessor"]
+        self._value_preprocessor = self.cfg["value_preprocessor"]
+        self._amp_state_preprocessor = self.cfg["amp_state_preprocessor"]
+
+        self._discount_factor = self.cfg["discount_factor"]
+        self._lambda = self.cfg["lambda"]
+
+        self._random_timesteps = self.cfg["random_timesteps"]
+        self._learning_starts = self.cfg["learning_starts"]
+
+        self._amp_batch_size = self.cfg["amp_batch_size"]
+        self._task_reward_weight = self.cfg["task_reward_weight"] 
+        self._style_reward_weight = self.cfg["style_reward_weight"]
+
+        self._discriminator_reward_scale = self.cfg["discriminator_reward_scale"]
+        self._discriminator_logit_regularization_scale = self.cfg["discriminator_logit_regularization_scale"]
+        self._discriminator_gradient_penalty_scale = self.cfg["discriminator_gradient_penalty_scale"]
+        self._discriminator_weight_decay_scale = self.cfg["discriminator_weight_decay_scale"]
+
+        self._rewards_shaper = self.cfg["rewards_shaper"]
+
+        # set up optimizer and learning rate scheduler
+        if self.policy is not None and self.value is not None and self.discriminator is not None:
+            self.optimizer = torch.optim.Adam(itertools.chain(self.policy.parameters(), 
+                                                              self.value.parameters(),
+                                                              self.discriminator.parameters()), 
+                                              lr=self._learning_rate)
+            if self._learning_rate_scheduler is not None:
+                self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
+
+        # set up preprocessors
+        self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
+            else self._empty_preprocessor
+        self._value_preprocessor = self._value_preprocessor(**self.cfg["value_preprocessor_kwargs"]) if self._value_preprocessor \
+            else self._empty_preprocessor
+        self._amp_state_preprocessor = self._amp_state_preprocessor(**self.cfg["amp_state_preprocessor_kwargs"]) \
+            if self._amp_state_preprocessor else self._empty_preprocessor
+
+    def init(self) -> None:
+        """Initialize the agent
+        """
+        super().init()
+        self.set_mode("eval")
+        
+        # create tensors in memory
+        if self.memory is not None:
+            self.memory.create_tensor(name="states", size=self.observation_space, dtype=torch.float32)
+            self.memory.create_tensor(name="next_states", size=self.observation_space, dtype=torch.float32)
+            self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.float32)
+            self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32)
+            self.memory.create_tensor(name="dones", size=1, dtype=torch.bool)
+            self.memory.create_tensor(name="log_prob", size=self.action_space, dtype=torch.float32)
+            self.memory.create_tensor(name="values", size=1, dtype=torch.float32)
+            self.memory.create_tensor(name="returns", size=1, dtype=torch.float32)
+            self.memory.create_tensor(name="advantages", size=1, dtype=torch.float32)
+
+            self.memory.create_tensor(name="amp_states", size=self.amp_observation_space, dtype=torch.float32)
+            self.memory.create_tensor(name="next_values", size=1, dtype=torch.float32)
+
+        self.tensors_names = ["states", "actions", "rewards", "next_states", "dones", \
+            "log_prob", "values", "returns", "advantages", "amp_states", "next_values"]
+
+        # create tensors for motion dataset and reply buffer
+        self.motion_dataset.create_tensor(name="states", size=self.amp_observation_space, dtype=torch.float32)
+        self.reply_buffer.create_tensor(name="states", size=self.amp_observation_space, dtype=torch.float32)
+
+        # initialize motion dataset
+        for _ in range(math.ceil(self.motion_dataset.memory_size / self._amp_batch_size)):
+            self.motion_dataset.add_samples(states=self.collect_reference_motions(self._amp_batch_size))
+
+        # create temporary variables needed for storage and computation
+        self._current_log_prob = None
+        self._current_states = None
+
+    def act(self, 
+            states: torch.Tensor, 
+            timestep: int, 
+            timesteps: int, 
+            inference: bool = False) -> torch.Tensor:
+        """Process the environment's states to make a decision (actions) using the main policy
+
+        :param states: Environment's states
+        :type states: torch.Tensor
+        :param timestep: Current timestep
+        :type timestep: int
+        :param timesteps: Number of timesteps
+        :type timesteps: int
+        :param inference: Flag to indicate whether the model is making inference
+        :type inference: bool
+
+        :return: Actions
+        :rtype: torch.Tensor
+        """
+        # use collected states
+        if self._current_states is not None:
+            states = self._current_states
+
+        states = self._state_preprocessor(states)
+
+        # sample random actions
+        # TODO, check for stochasticity
+        if timestep < self._random_timesteps:
+            return self.policy.random_act(states)
+
+        # sample stochastic actions
+        actions, log_prob, actions_mean = self.policy.act(states, inference=inference)
+        self._current_log_prob = log_prob
+
+        return actions, log_prob, actions_mean
+
+    def record_transition(self, 
+                          states: torch.Tensor, 
+                          actions: torch.Tensor, 
+                          rewards: torch.Tensor, 
+                          next_states: torch.Tensor, 
+                          dones: torch.Tensor, 
+                          infos: Any, 
+                          timestep: int, 
+                          timesteps: int) -> None:
+        """Record an environment transition in memory
+        
+        :param states: Observations/states of the environment used to make the decision
+        :type states: torch.Tensor
+        :param actions: Actions taken by the agent
+        :type actions: torch.Tensor
+        :param rewards: Instant rewards achieved by the current actions
+        :type rewards: torch.Tensor
+        :param next_states: Next observations/states of the environment
+        :type next_states: torch.Tensor
+        :param dones: Signals to indicate that episodes have ended
+        :type dones: torch.Tensor
+        :param infos: Additional information about the environment
+        :type infos: Any type supported by the environment
+        :param timestep: Current timestep
+        :type timestep: int
+        :param timesteps: Number of timesteps
+        :type timesteps: int
+        """
+        # use collected states
+        if self._current_states is not None:
+            states = self._current_states
+        
+        super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps)
+
+        # reward shaping
+        if self._rewards_shaper is not None:
+            rewards = self._rewards_shaper(rewards, timestep, timesteps)
+
+        amp_states = infos["amp_obs"]
+
+        if self.memory is not None:
+            values, _, _ = self.value.act(states=self._state_preprocessor(states), inference=True)
+            values = self._value_preprocessor(values, inverse=True)
+
+            next_values, _, _ = self.value.act(states=self._state_preprocessor(next_states), inference=True)
+            next_values = self._value_preprocessor(next_values, inverse=True)
+            next_values *= infos['terminate'].view(-1, 1).logical_not()
+
+            self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, 
+                                    log_prob=self._current_log_prob, values=values, amp_states=amp_states, next_values=next_values)
+            for memory in self.secondary_memories:
+                memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, 
+                                   log_prob=self._current_log_prob, values=values, amp_states=amp_states, next_values=next_values)
+        
+    def pre_interaction(self, timestep: int, timesteps: int) -> None:
+        """Callback called before the interaction with the environment
+
+        :param timestep: Current timestep
+        :type timestep: int
+        :param timesteps: Number of timesteps
+        :type timesteps: int
+        """
+        if self.collect_observation is not None:
+            self._current_states = self.collect_observation()
+
+    def post_interaction(self, timestep: int, timesteps: int) -> None:
+        """Callback called after the interaction with the environment
+
+        :param timestep: Current timestep
+        :type timestep: int
+        :param timesteps: Number of timesteps
+        :type timesteps: int
+        """
+        self._rollout += 1
+        if not self._rollout % self._rollouts and timestep >= self._learning_starts:
+            self.set_mode("train")
+            self._update(timestep, timesteps)
+            self.set_mode("eval")
+
+        # write tracking data and checkpoints
+        super().post_interaction(timestep, timesteps)
+
+    def _update(self, timestep: int, timesteps: int) -> None:
+        """Algorithm's main update step
+
+        :param timestep: Current timestep
+        :type timestep: int
+        :param timesteps: Number of timesteps
+        :type timesteps: int
+        """
+        def compute_gae(rewards: torch.Tensor, 
+                        dones: torch.Tensor, 
+                        values: torch.Tensor, 
+                        next_values: torch.Tensor, 
+                        discount_factor: float = 0.99, 
+                        lambda_coefficient: float = 0.95) -> torch.Tensor:
+            """Compute the Generalized Advantage Estimator (GAE)
+
+            :param rewards: Rewards obtained by the agent
+            :type rewards: torch.Tensor
+            :param dones: Signals to indicate that episodes have ended
+            :type dones: torch.Tensor
+            :param values: Values obtained by the agent
+            :type values: torch.Tensor
+            :param next_values: Next values obtained by the agent
+            :type next_values: torch.Tensor
+            :param discount_factor: Discount factor
+            :type discount_factor: float
+            :param lambda_coefficient: Lambda coefficient
+            :type lambda_coefficient: float
+
+            :return: Generalized Advantage Estimator
+            :rtype: torch.Tensor
+            """
+            advantage = 0
+            advantages = torch.zeros_like(rewards)
+            not_dones = dones.logical_not()
+            memory_size = rewards.shape[0]
+
+            # advantages computation
+            for i in reversed(range(memory_size)):
+                advantage = rewards[i] - values[i] + discount_factor * (next_values[i] + lambda_coefficient * not_dones[i] * advantage)
+                advantages[i] = advantage
+            # returns computation
+            returns = advantages + values
+            # normalize advantages
+            advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
+
+            return returns, advantages
+
+        # update dataset of reference motions
+        self.motion_dataset.add_samples(states=self.collect_reference_motions(self._amp_batch_size))
+
+        # compute combined rewards
+        rewards = self.memory.get_tensor_by_name("rewards")
+        amp_states = self.memory.get_tensor_by_name("amp_states")
+
+        with torch.no_grad():
+            amp_logits, _, _ = self.discriminator.act(self._amp_state_preprocessor(amp_states))
+            style_reward = -torch.log(torch.maximum(1 - 1 / (1 + torch.exp(-amp_logits)), torch.tensor(0.0001, device=self.device)))
+            style_reward *= self._discriminator_reward_scale
+        
+        combined_rewards = self._task_reward_weight * rewards + self._style_reward_weight * style_reward
+
+        # compute returns and advantages
+        values = self.memory.get_tensor_by_name("values")
+        next_values=self.memory.get_tensor_by_name("next_values")
+        returns, advantages = compute_gae(rewards=combined_rewards,
+                                          dones=self.memory.get_tensor_by_name("dones"),
+                                          values=values,
+                                          next_values=next_values,
+                                          discount_factor=self._discount_factor,
+                                          lambda_coefficient=self._lambda)
+
+        self.memory.set_tensor_by_name("values", self._value_preprocessor(values, train=True))
+        self.memory.set_tensor_by_name("returns", self._value_preprocessor(returns, train=True))
+        self.memory.set_tensor_by_name("advantages", advantages)
+
+        # sample mini-batches from memory
+        sampled_batches = self.memory.sample_all(names=self.tensors_names, mini_batches=self._mini_batches)
+        sampled_motion_batches = self.motion_dataset.sample(names=["states"],
+                                                            batch_size=self.memory.memory_size * self.memory.num_envs,
+                                                            mini_batches=self._mini_batches)
+        if len(self.reply_buffer):
+            sampled_replay_batches = self.reply_buffer.sample(names=["states"],
+                                                              batch_size=self.memory.memory_size * self.memory.num_envs,
+                                                              mini_batches=self._mini_batches)
+        else:
+            sampled_replay_batches = [[batches[self.tensors_names.index("amp_states")]] for batches in sampled_batches]
+
+        cumulative_policy_loss = 0
+        cumulative_entropy_loss = 0
+        cumulative_value_loss = 0
+        cumulative_discriminator_loss = 0
+
+        # learning epochs
+        for epoch in range(self._learning_epochs):
+
+            # mini-batches loop
+            for batch_index, (sampled_states, sampled_actions, _, _, _, \
+                sampled_log_prob, sampled_values, sampled_returns, sampled_advantages, \
+                sampled_amp_states, _) in enumerate(sampled_batches):
+
+                sampled_states = self._state_preprocessor(sampled_states, train=True)
+                
+                _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions)
+
+                # compute entropy loss
+                if self._entropy_loss_scale:
+                    entropy_loss = -self._entropy_loss_scale * self.policy.get_entropy().mean()
+                else:
+                    entropy_loss = 0
+                
+                # compute policy loss
+                ratio = torch.exp(next_log_prob - sampled_log_prob)
+                surrogate = sampled_advantages * ratio
+                surrogate_clipped = sampled_advantages * torch.clip(ratio, 1.0 - self._ratio_clip, 1.0 + self._ratio_clip)
+                
+                policy_loss = -torch.min(surrogate, surrogate_clipped).mean()
+
+                # compute value loss
+                predicted_values, _, _ = self.value.act(states=sampled_states)
+
+                if self._clip_predicted_values:
+                    predicted_values = sampled_values + torch.clip(predicted_values - sampled_values, 
+                                                                min=-self._value_clip, 
+                                                                max=self._value_clip)
+                value_loss = self._value_loss_scale * F.mse_loss(sampled_returns, predicted_values)
+
+                # compute discriminator loss
+                amp_logits, _, _ = self.discriminator.act(states=self._amp_state_preprocessor(sampled_amp_states[0:4096], train=True))
+
+                amp_replay_logits, _, _ = self.discriminator.act(states=self._amp_state_preprocessor(sampled_replay_batches[batch_index][0][0:4096], train=True))
+
+                sampled_amp_motion_states = self._amp_state_preprocessor(sampled_motion_batches[batch_index][0][0:4096], train=True)
+                sampled_amp_motion_states.requires_grad_(True)
+                amp_motion_logits, _, _ = self.discriminator.act(states=sampled_amp_motion_states)
+
+                amp_cat_logits = torch.cat([amp_logits, amp_replay_logits], dim=0)
+
+                # discriminator prediction loss
+                discriminator_loss = 0.5 * (nn.BCEWithLogitsLoss()(amp_cat_logits, torch.zeros_like(amp_cat_logits)) \
+                    + torch.nn.BCEWithLogitsLoss()(amp_motion_logits, torch.ones_like(amp_motion_logits)))
+
+                # discriminator logit regularization
+                if self._discriminator_logit_regularization_scale:
+                    logit_weights = torch.flatten(list(self.discriminator.modules())[-1].weight)
+                    discriminator_loss += self._discriminator_logit_regularization_scale * torch.sum(torch.square(logit_weights))
+
+                # discriminator gradient penalty
+                if self._discriminator_gradient_penalty_scale:
+                    amp_motion_gradient = torch.autograd.grad(amp_motion_logits, 
+                                                              sampled_amp_motion_states, 
+                                                              grad_outputs=torch.ones_like(amp_motion_logits),
+                                                              create_graph=True, 
+                                                              retain_graph=True, 
+                                                              only_inputs=True)
+                    gradient_penalty = torch.sum(torch.square(amp_motion_gradient[0]), dim=-1).mean()
+                    discriminator_loss += self._discriminator_gradient_penalty_scale * gradient_penalty
+
+                # discriminator weight decay
+                if self._discriminator_weight_decay_scale:
+                    weights = [torch.flatten(module.weight) for module in self.discriminator.modules() \
+                        if isinstance(module, torch.nn.Linear)]
+                    weight_decay = torch.sum(torch.square(torch.cat(weights, dim=-1)))
+                    discriminator_loss += self._discriminator_weight_decay_scale * weight_decay
+
+                discriminator_loss *= self._discriminator_loss_scale
+
+                # optimization step
+                self.optimizer.zero_grad()
+                (policy_loss + entropy_loss + value_loss + discriminator_loss).backward()
+                if self._grad_norm_clip > 0:
+                    nn.utils.clip_grad_norm_(itertools.chain(self.policy.parameters(), 
+                                                             self.value.parameters(), 
+                                                             self.discriminator.parameters()), 
+                                             max_norm=self._grad_norm_clip)
+                self.optimizer.step()
+
+                # update cumulative losses
+                cumulative_policy_loss += policy_loss.item()
+                cumulative_value_loss += value_loss.item()
+                if self._entropy_loss_scale:
+                    cumulative_entropy_loss += entropy_loss.item()
+                cumulative_discriminator_loss += discriminator_loss.item()
+
+            # update learning rate
+            if self._learning_rate_scheduler:
+                self.scheduler.step()
+
+        # update AMP repaly buffer
+        self.reply_buffer.add_samples(states=amp_states)
+
+        # record data
+        self.track_data("Loss / Policy loss", cumulative_policy_loss / (self._learning_epochs * self._mini_batches))
+        self.track_data("Loss / Value loss", cumulative_value_loss / (self._learning_epochs * self._mini_batches))
+        if self._entropy_loss_scale:
+            self.track_data("Loss / Entropy loss", cumulative_entropy_loss / (self._learning_epochs * self._mini_batches))
+        self.track_data("Loss / Discriminator loss", cumulative_discriminator_loss / (self._learning_epochs * self._mini_batches))
+
+        self.track_data("Policy / Standard deviation", self.policy.distribution().stddev.mean().item())
+
+        if self._learning_rate_scheduler:
+            self.track_data("Learning / Learning rate", self.scheduler.get_last_lr()[0])

From 45df8389944dc4ebd6497266df97c9e2f869182c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 24 Jul 2022 23:54:04 +0200
Subject: [PATCH 003/108] Add AMP agent to docs

---
 docs/source/index.rst                   |   2 +
 docs/source/modules/skrl.agents.amp.rst | 155 ++++++++++++++++++++++++
 2 files changed, 157 insertions(+)
 create mode 100644 docs/source/modules/skrl.agents.amp.rst

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 9245c30e..69dc9550 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -56,6 +56,7 @@ Agents
     Definition of reinforcement learning algorithms that compute an optimal policy. All agents inherit from one and only one :doc:`base class <modules/skrl.agents.base_class>` (that defines a uniform interface and provides for common functionalities) but which is not tied to the implementation details of the algorithms
 
     * :doc:`Advantage Actor Critic <modules/skrl.agents.a2c>` (**A2C**)
+    * :doc:`Adversarial Motion Priors <modules/skrl.agents.amp>` (**AMP**)
     * :doc:`Cross-Entropy Method <modules/skrl.agents.cem>` (**CEM**)
     * :doc:`Deep Deterministic Policy Gradient <modules/skrl.agents.ddpg>` (**DDPG**)
     * :doc:`Double Deep Q-Network <modules/skrl.agents.ddqn>` (**DDQN**)
@@ -74,6 +75,7 @@ Agents
 
     modules/skrl.agents.base_class
     A2C <modules/skrl.agents.a2c>
+    AMP <modules/skrl.agents.amp>
     CEM <modules/skrl.agents.cem>
     DDPG <modules/skrl.agents.ddpg>
     DDQN <modules/skrl.agents.ddqn>
diff --git a/docs/source/modules/skrl.agents.amp.rst b/docs/source/modules/skrl.agents.amp.rst
new file mode 100644
index 00000000..e8e7c228
--- /dev/null
+++ b/docs/source/modules/skrl.agents.amp.rst
@@ -0,0 +1,155 @@
+Adversarial Motion Priors (AMP)
+===============================
+
+AMP is a **model-free**, **stochastic** **on-policy** **policy gradient** algorithm (trained using a combination of GAIL and PPO) for adversarial learning of physics-based character animation. It enables characters to imitate diverse behaviors from large unstructured datasets, without the need for motion planners or other mechanisms for clip selection
+
+Paper: `AMP: Adversarial Motion Priors for Stylized Physics-Based Character Control <https://arxiv.org/abs/2104.02180>`_
+
+Algorithm implementation
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+| Main notation/symbols:
+|   - policy (:math:`\pi_\theta`), value (:math:`V_\phi`) and discriminator (:math:`D_\psi`) function approximators
+|   - states (:math:`s`), actions (:math:`a`), rewards (:math:`r`), next states (:math:`s'`), dones (:math:`d`)
+|   - values (:math:`V`), next values (:math:`V'`), advantages (:math:`A`), returns (:math:`R`)
+|   - log probabilities (:math:`logp`)
+|   - loss (:math:`L`)
+|   - reference motion dataset (:math:`M`), AMP replay buffer (:math:`B`)
+|   - AMP states (:math:`s_{_{AMP}}`), reference motion states (:math:`s_{_{AMP}}^{^M}`), AMP states from replay buffer (:math:`s_{_{AMP}}^{^B}`)
+
+**Learning algorithm** (:literal:`_update(...)`)
+
+| :literal:`compute_gae(...)`
+| :blue:`def` :math:`\;f_{GAE} (r, d, V, V') \;\rightarrow\; R, A:`
+|     :math:`adv \leftarrow 0`
+|     :math:`A \leftarrow \text{zeros}(r)`
+|     :green:`# advantages computation`
+|     **FOR** each reverse iteration :math:`i` up to the number of rows in :math:`r` **DO**
+|         :math:`adv \leftarrow r_i - V_i \, +` :guilabel:`discount_factor` :math:`(V' \, +` :guilabel:`lambda` :math:`\neg d_i \; adv)`
+|         :math:`A_i \leftarrow adv`
+|     :green:`# returns computation`
+|     :math:`R \leftarrow A + V`
+|     :green:`# normalize advantages`
+|     :math:`A \leftarrow \dfrac{A - \bar{A}}{A_\sigma + 10^{-8}}`
+
+| :green:`# update dataset of reference motions`
+| :math:`\text{collect reference motions of}` :guilabel:`amp_batch_size` :math:`\rightarrow\;` :math:`\text{append}(M)`
+| :green:`# compute combined rewards`
+| :math:`r_D \leftarrow -log(\text{max}( 1 - \hat{y}(D_\psi(s_{_{AMP}})), \, 10^{-4})) \qquad` with :math:`\; \hat{y}(x) = \dfrac{1}{1 + e^{-x}}`
+| :math:`r' \leftarrow` :guilabel:`task_reward_weight` :math:`r \, +` :guilabel:`style_reward_weight` :guilabel:`discriminator_reward_scale` :math:`r_D`
+| :green:`# compute returns and advantages`
+| :math:`R, A \leftarrow f_{GAE}(r', d, V, V')`
+| :green:`# sample mini-batches from memory`
+| [[:math:`s, a, logp, V, R, A, s_{_{AMP}}`]] :math:`\leftarrow` states, actions, log_prob, values, returns, advantages, AMP states
+| [[:math:`s_{_{AMP}}^{^M}`]] :math:`\leftarrow` AMP states from :math:`M`
+| **IF** :math:`B` is not empty **THEN**
+|     [[:math:`s_{_{AMP}}^{^B}`]] :math:`\leftarrow` AMP states from :math:`B`
+| **ELSE**
+|     [[:math:`s_{_{AMP}}^{^B}`]] :math:`\leftarrow` [[:math:`s_{_{AMP}}`]]
+| :green:`# learning epochs`
+| **FOR** each learning epoch up to :guilabel:`learning_epochs` **DO**
+|     :green:`# mini-batches loop`
+|     **FOR** each mini-batch [:math:`s, a, logp, V, R, A, s_{_{AMP}}, s_{_{AMP}}^{^B}, s_{_{AMP}}^{^M}`] up to :guilabel:`mini_batches` **DO**
+|         :math:`logp' \leftarrow \pi_\theta(s, a)`
+|         :green:`# compute entropy loss`
+|         **IF** entropy computation is enabled **THEN**
+|             :math:`{L}_{entropy} \leftarrow \, -` :guilabel:`entropy_loss_scale` :math:`\frac{1}{N} \sum_{i=1}^N \pi_{\theta_{entropy}}`
+|         **ELSE**
+|             :math:`{L}_{entropy} \leftarrow 0`
+|         :green:`# compute policy loss`
+|         :math:`ratio \leftarrow e^{logp' - logp}`
+|         :math:`L_{_{surrogate}} \leftarrow A \; ratio`
+|         :math:`L_{_{clipped\,surrogate}} \leftarrow A \; \text{clip}(ratio, 1 - c, 1 + c) \qquad` with :math:`c` as :guilabel:`ratio_clip`
+|         :math:`L^{clip}_{\pi_\theta} \leftarrow - \frac{1}{N} \sum_{i=1}^N \min(L_{_{surrogate}}, L_{_{clipped\,surrogate}})`
+|         :green:`# compute value loss`
+|         :math:`V_{_{predicted}} \leftarrow V_\phi(s)`
+|         **IF** :guilabel:`clip_predicted_values` is enabled **THEN**
+|             :math:`V_{_{predicted}} \leftarrow V + \text{clip}(V_{_{predicted}} - V, -c, c) \qquad` with :math:`c` as :guilabel:`value_clip`
+|         :math:`L_{V_\phi} \leftarrow` :guilabel:`value_loss_scale` :math:`\frac{1}{N} \sum_{i=1}^N (R - V_{_{predicted}})^2`
+|         :green:`# compute discriminator loss`
+|         :math:`{logit}_{_{AMP}} \leftarrow D_\psi(s_{_{AMP}})`
+|         :math:`{logit}_{_{AMP}}^{^B} \leftarrow D_\psi(s_{_{AMP}}^{^B})`
+|         :math:`{logit}_{_{AMP}}^{^M} \leftarrow D_\psi(s_{_{AMP}}^{^M})`
+|         :green:`# discriminator prediction loss`
+|         :math:`L_{D_\psi} \leftarrow \dfrac{1}{2}(BCE({logit}_{_{AMP}}` ++ :math:`{logit}_{_{AMP}}^{^B}, \, 0) + BCE({logit}_{_{AMP}}^{^M}, \, 1))` 
+|              with :math:`\; BCE(x,y)=-\frac{1}{N} \sum_{i=1}^N [y \; log(\hat{y}) + (1-y) \, log(1-\hat{y})] \;` and :math:`\; \hat{y} = \dfrac{1}{1 + e^{-x}}`
+|         :green:`# discriminator logit regularization`
+|         :math:`L_{D_\psi} \leftarrow L_{D_\psi} +` :guilabel:`discriminator_logit_regularization_scale` :math:`\sum_{i=1}^N \text{flatten}(\psi_w[-1])^2`
+|         :green:`# discriminator gradient penalty`
+|         :math:`L_{D_\psi} \leftarrow L_{D_\psi} +` :guilabel:`discriminator_gradient_penalty_scale` :math:`\frac{1}{N} \sum_{i=1}^N \sum (\nabla_\psi {logit}_{_{AMP}}^{^M})^2`
+|         :green:`# discriminator weight decay`
+|         :math:`L_{D_\psi} \leftarrow L_{D_\psi} +` :guilabel:`discriminator_weight_decay_scale` :math:`\sum_{i=1}^N \text{flatten}(\psi_w)^2`
+|         :green:`# optimization step`
+|         reset :math:`\text{optimizer}_{\theta, \phi, \psi}`
+|         :math:`\nabla_{\theta, \, \phi, \, \psi} (L^{clip}_{\pi_\theta} + {L}_{entropy} + L_{V_\phi} + L_{D_\psi})`
+|         :math:`\text{clip}(\lVert \nabla_{\theta, \, \phi, \, \psi} \rVert)` with :guilabel:`grad_norm_clip` 
+|         step :math:`\text{optimizer}_{\theta, \phi, \psi}`
+|     :green:`# update learning rate`
+|     **IF** there is a :guilabel:`learning_rate_scheduler` **THEN**
+|         step :math:`\text{scheduler}_{\theta, \phi, \psi} (\text{optimizer}_{\theta, \phi, \psi})`
+| :green:`# update AMP repaly buffer`
+| :math:`s_{_{AMP}} \rightarrow\;` :math:`\text{append}(B)`
+
+Configuration and hyperparameters
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. py:data:: skrl.agents.torch.amp.amp.AMP_DEFAULT_CONFIG
+
+.. literalinclude:: ../../../skrl/agents/torch/amp/amp.py
+   :language: python
+   :lines: 18-67
+   :linenos:
+
+Spaces and models
+^^^^^^^^^^^^^^^^^
+
+The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+
+.. list-table::
+   :header-rows: 1
+
+   * - Gym spaces
+     - .. centered:: Observation
+     - .. centered:: Action
+   * - Discrete
+     - .. centered:: :math:`\square`
+     - .. centered:: :math:`\square`
+   * - Box
+     - .. centered:: :math:`\blacksquare`
+     - .. centered:: :math:`\blacksquare`
+   * - Dict
+     - .. centered:: :math:`\square`
+     - .. centered:: :math:`\square`
+
+The implementation uses 1 stochastic (continuous) and 2 deterministic function approximators. These function approximators (models) must be collected in a dictionary and passed to the constructor of the class under the argument :literal:`models`
+
+.. list-table::
+   :header-rows: 1
+
+   * - Notation
+     - Concept
+     - Key
+     - Type
+   * - :math:`\pi_\theta(s)`
+     - Policy
+     - :literal:`"policy"`
+     - :ref:`Gaussian <models_gaussian>`
+   * - :math:`V_\phi(s)`
+     - Value
+     - :literal:`"value"`
+     - :ref:`Deterministic <models_deterministic>`
+   * - :math:`D_\psi(s)`
+     - Discriminator
+     - :literal:`"discriminator"`
+     - :ref:`Deterministic <models_deterministic>`
+
+API
+^^^
+
+.. autoclass:: skrl.agents.torch.amp.amp.AMP
+   :undoc-members:
+   :show-inheritance:
+   :private-members: _update
+   :members:
+   
+   .. automethod:: __init__

From 9cb5d0c68161f4ea288fe61565e8ea69183a9437 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 24 Jul 2022 23:59:46 +0200
Subject: [PATCH 004/108] Update CHANGELOG with unreleased modifications

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fce6847f..196a69bc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
+## [0.8.0] - Unreleased
+### Added
+- AMP agent for physics-based character animation
+
 ## [0.7.0] - 2022-07-11
 ### Added
 - A2C agent

From 7abf7836adcc4d5e2ebba240c7212cf159822ca2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 25 Jul 2022 12:41:25 +0200
Subject: [PATCH 005/108] List input and output shapes for each agent model in
 docs

---
 docs/source/modules/skrl.agents.a2c.rst        |  6 ++++++
 docs/source/modules/skrl.agents.amp.rst        | 14 +++++++++++++-
 docs/source/modules/skrl.agents.cem.rst        |  4 ++++
 docs/source/modules/skrl.agents.ddpg.rst       | 10 ++++++++++
 docs/source/modules/skrl.agents.ddqn.rst       |  6 ++++++
 docs/source/modules/skrl.agents.dqn.rst        |  6 ++++++
 docs/source/modules/skrl.agents.ppo.rst        |  6 ++++++
 docs/source/modules/skrl.agents.q_learning.rst |  4 ++++
 docs/source/modules/skrl.agents.sac.rst        | 12 ++++++++++++
 docs/source/modules/skrl.agents.sarsa.rst      |  4 ++++
 docs/source/modules/skrl.agents.td3.rst        | 14 ++++++++++++++
 docs/source/modules/skrl.agents.trpo.rst       |  6 ++++++
 12 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/docs/source/modules/skrl.agents.a2c.rst b/docs/source/modules/skrl.agents.a2c.rst
index 4898bd61..da63d444 100644
--- a/docs/source/modules/skrl.agents.a2c.rst
+++ b/docs/source/modules/skrl.agents.a2c.rst
@@ -110,14 +110,20 @@ The implementation uses 1 stochastic (discrete or continuous) and 1 deterministi
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`\pi_\theta(s)`
      - Policy
      - :literal:`"policy"`
      - :ref:`Categorical <models_categorical>` / :ref:`Gaussian <models_gaussian>`
+     - observation
+     - action
    * - :math:`V_\phi(s)`
      - Value
      - :literal:`"value"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation
+     - 1
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.amp.rst b/docs/source/modules/skrl.agents.amp.rst
index e8e7c228..2256f4d9 100644
--- a/docs/source/modules/skrl.agents.amp.rst
+++ b/docs/source/modules/skrl.agents.amp.rst
@@ -109,17 +109,21 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.ml
    :header-rows: 1
 
    * - Gym spaces
+     - .. centered:: AMP observation
      - .. centered:: Observation
      - .. centered:: Action
    * - Discrete
      - .. centered:: :math:`\square`
      - .. centered:: :math:`\square`
+     - .. centered:: :math:`\square`
    * - Box
      - .. centered:: :math:`\blacksquare`
      - .. centered:: :math:`\blacksquare`
+     - .. centered:: :math:`\blacksquare`
    * - Dict
      - .. centered:: :math:`\square`
      - .. centered:: :math:`\square`
+     - .. centered:: :math:`\square`
 
 The implementation uses 1 stochastic (continuous) and 2 deterministic function approximators. These function approximators (models) must be collected in a dictionary and passed to the constructor of the class under the argument :literal:`models`
 
@@ -130,18 +134,26 @@ The implementation uses 1 stochastic (continuous) and 2 deterministic function a
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`\pi_\theta(s)`
      - Policy
      - :literal:`"policy"`
      - :ref:`Gaussian <models_gaussian>`
+     - observation
+     - action
    * - :math:`V_\phi(s)`
      - Value
      - :literal:`"value"`
      - :ref:`Deterministic <models_deterministic>`
-   * - :math:`D_\psi(s)`
+     - observation
+     - 1
+   * - :math:`D_\psi(s_{_{AMP}})`
      - Discriminator
      - :literal:`"discriminator"`
      - :ref:`Deterministic <models_deterministic>`
+     - AMP observation
+     - 1
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.cem.rst b/docs/source/modules/skrl.agents.cem.rst
index cfe6fa29..0001678b 100644
--- a/docs/source/modules/skrl.agents.cem.rst
+++ b/docs/source/modules/skrl.agents.cem.rst
@@ -65,10 +65,14 @@ The implementation uses 1 discrete function approximator. This function approxim
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`\pi(s)`
      - Policy
      - :literal:`"policy"`
      - :ref:`Categorical <models_categorical>`
+     - observation
+     - action
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.ddpg.rst b/docs/source/modules/skrl.agents.ddpg.rst
index 96de2054..51dfc657 100644
--- a/docs/source/modules/skrl.agents.ddpg.rst
+++ b/docs/source/modules/skrl.agents.ddpg.rst
@@ -93,22 +93,32 @@ The implementation uses 4 deterministic function approximators. These function a
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`\mu_\theta(s)`
      - Policy (actor)
      - :literal:`"policy"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation
+     - action
    * - :math:`\mu_{\theta_{target}}(s)`
      - Target policy
      - :literal:`"target_policy"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation
+     - action
    * - :math:`Q_\phi(s, a)`
      - Q-network (critic)
      - :literal:`"critic"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation + action
+     - 1
    * - :math:`Q_{\phi_{target}}(s, a)`
      - Target Q-network
      - :literal:`"target_critic"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation + action
+     - 1
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.ddqn.rst b/docs/source/modules/skrl.agents.ddqn.rst
index 76b53ca1..afeddc73 100644
--- a/docs/source/modules/skrl.agents.ddqn.rst
+++ b/docs/source/modules/skrl.agents.ddqn.rst
@@ -72,14 +72,20 @@ The implementation uses 2 deterministic function approximators. These function a
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`Q_\phi(s, a)`
      - Q-network
      - :literal:`"q_network"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation
+     - action
    * - :math:`Q_{\phi_{target}}(s, a)`
      - Target Q-network
      - :literal:`"target_q_network"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation
+     - action
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.dqn.rst b/docs/source/modules/skrl.agents.dqn.rst
index f1d01871..77eb0e89 100644
--- a/docs/source/modules/skrl.agents.dqn.rst
+++ b/docs/source/modules/skrl.agents.dqn.rst
@@ -72,14 +72,20 @@ The implementation uses 2 deterministic function approximators. These function a
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`Q_\phi(s, a)`
      - Q-network
      - :literal:`"q_network"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation
+     - action
    * - :math:`Q_{\phi_{target}}(s, a)`
      - Target Q-network
      - :literal:`"target_q_network"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation
+     - action
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.ppo.rst b/docs/source/modules/skrl.agents.ppo.rst
index f53bb403..5260bddc 100644
--- a/docs/source/modules/skrl.agents.ppo.rst
+++ b/docs/source/modules/skrl.agents.ppo.rst
@@ -125,14 +125,20 @@ The implementation uses 1 stochastic (discrete or continuous) and 1 deterministi
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`\pi_\theta(s)`
      - Policy
      - :literal:`"policy"`
      - :ref:`Categorical <models_categorical>` / :ref:`Gaussian <models_gaussian>`
+     - observation
+     - action
    * - :math:`V_\phi(s)`
      - Value
      - :literal:`"value"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation
+     - 1
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.q_learning.rst b/docs/source/modules/skrl.agents.q_learning.rst
index f4cc8a00..b5dee107 100644
--- a/docs/source/modules/skrl.agents.q_learning.rst
+++ b/docs/source/modules/skrl.agents.q_learning.rst
@@ -63,10 +63,14 @@ The implementation uses 1 table. This table (model) must be collected in a dicti
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`\pi_{Q[s,a]}(s)`
      - Policy (:math:`\epsilon`-greedy)
      - :literal:`"policy"`
      - :ref:`Tabular <models_tabular>`
+     - observation
+     - action
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.sac.rst b/docs/source/modules/skrl.agents.sac.rst
index 5fe11b9e..599c9ff0 100644
--- a/docs/source/modules/skrl.agents.sac.rst
+++ b/docs/source/modules/skrl.agents.sac.rst
@@ -100,26 +100,38 @@ The implementation uses 1 stochastic and 4 deterministic function approximators.
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`\pi_\theta(s)`
      - Policy (actor)
      - :literal:`"policy"`
      - :ref:`Gaussian <models_gaussian>`
+     - observation
+     - action
    * - :math:`Q_{\phi 1}(s, a)`
      - Q1-network (critic 1)
      - :literal:`"critic_1"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation + action
+     - 1
    * - :math:`Q_{\phi 2}(s, a)`
      - Q2-network (critic 2)
      - :literal:`"critic_2"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation + action
+     - 1
    * - :math:`Q_{{\phi 1}_{target}}(s, a)`
      - Target Q1-network
      - :literal:`"target_critic_1"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation + action
+     - 1
    * - :math:`Q_{{\phi 2}_{target}}(s, a)`
      - Target Q2-network
      - :literal:`"target_critic_2"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation + action
+     - 1
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.sarsa.rst b/docs/source/modules/skrl.agents.sarsa.rst
index 7688e26a..1c5fba3f 100644
--- a/docs/source/modules/skrl.agents.sarsa.rst
+++ b/docs/source/modules/skrl.agents.sarsa.rst
@@ -62,10 +62,14 @@ The implementation uses 1 table. This table (model) must be collected in a dicti
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`\pi_{Q[s,a]}(s)`
      - Policy (:math:`\epsilon`-greedy)
      - :literal:`"policy"`
      - :ref:`Tabular <models_tabular>`
+     - observation
+     - action
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.td3.rst b/docs/source/modules/skrl.agents.td3.rst
index f7ab9423..2b8108fb 100644
--- a/docs/source/modules/skrl.agents.td3.rst
+++ b/docs/source/modules/skrl.agents.td3.rst
@@ -103,30 +103,44 @@ The implementation uses 6 deterministic function approximators. These function a
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`\mu_\theta(s)`
      - Policy (actor)
      - :literal:`"policy"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation
+     - action
    * - :math:`\mu_{\theta_{target}}(s)`
      - Target policy
      - :literal:`"target_policy"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation
+     - action
    * - :math:`Q_{\phi 1}(s, a)`
      - Q1-network (critic 1)
      - :literal:`"critic_1"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation + action
+     - 1
    * - :math:`Q_{\phi 2}(s, a)`
      - Q2-network (critic 2)
      - :literal:`"critic_2"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation + action
+     - 1
    * - :math:`Q_{{\phi 1}_{target}}(s, a)`
      - Target Q1-network
      - :literal:`"target_critic_1"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation + action
+     - 1
    * - :math:`Q_{{\phi 2}_{target}}(s, a)`
      - Target Q2-network
      - :literal:`"target_critic_2"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation + action
+     - 1
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.trpo.rst b/docs/source/modules/skrl.agents.trpo.rst
index 8a60ae8a..d4e87b78 100644
--- a/docs/source/modules/skrl.agents.trpo.rst
+++ b/docs/source/modules/skrl.agents.trpo.rst
@@ -163,14 +163,20 @@ The implementation uses 1 stochastic and 1 deterministic function approximator.
      - Concept
      - Key
      - Type
+     - Input shape
+     - Output shape
    * - :math:`\pi_\theta(s)`
      - Policy
      - :literal:`"policy"`
      - :ref:`Gaussian <models_gaussian>`
+     - observation
+     - action
    * - :math:`V_\phi(s)`
      - Value
      - :literal:`"value"`
      - :ref:`Deterministic <models_deterministic>`
+     - observation
+     - 1
 
 API
 ^^^

From adc49551efc68878af7b76411ca51632cf84a512 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 25 Jul 2022 13:43:11 +0200
Subject: [PATCH 006/108] Add AMP example to docs

---
 docs/source/examples/isaacgym/amp_humanoid.py | 143 ++++++++++++++++++
 docs/source/intro/examples.rst                |  10 ++
 2 files changed, 153 insertions(+)
 create mode 100644 docs/source/examples/isaacgym/amp_humanoid.py

diff --git a/docs/source/examples/isaacgym/amp_humanoid.py b/docs/source/examples/isaacgym/amp_humanoid.py
new file mode 100644
index 00000000..6fe006d9
--- /dev/null
+++ b/docs/source/examples/isaacgym/amp_humanoid.py
@@ -0,0 +1,143 @@
+import isaacgym
+
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.memories.torch import RandomMemory
+from skrl.agents.torch.amp import AMP, AMP_DEFAULT_CONFIG
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+from skrl.envs.torch import load_isaacgym_env_preview4
+from skrl.utils import set_seed
+
+
+# set the seed for reproducibility
+set_seed(42)
+
+
+# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# - Policy: takes as input the environment's observation/state and returns an action
+# - Value: takes the state as input and provides a value to guide the policy
+# - Discriminator: differentiate between police-generated behaviors and behaviors from the motion dataset
+class Policy(GaussianModel):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+        super().__init__(observation_space, action_space, device, clip_actions,
+                         clip_log_std, min_log_std, max_log_std)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 1024),
+                                 nn.ReLU(),
+                                 nn.Linear(1024, 512),
+                                 nn.ReLU(),
+                                 nn.Linear(512, self.num_actions))
+        
+        # set a fixed log standard deviation for the policy
+        self.log_std_parameter = nn.Parameter(torch.full((self.num_actions,), fill_value=-2.9), requires_grad=False)
+
+    def compute(self, states, taken_actions):
+        return torch.tanh(self.net(states)), self.log_std_parameter
+
+class Value(DeterministicModel):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        super().__init__(observation_space, action_space, device, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 1024),
+                                 nn.ReLU(),
+                                 nn.Linear(1024, 512),
+                                 nn.ReLU(),
+                                 nn.Linear(512, 1))
+
+    def compute(self, states, taken_actions):
+        return self.net(states)
+
+class Discriminator(DeterministicModel):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        super().__init__(observation_space, action_space, device, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 1024),
+                                 nn.ReLU(),
+                                 nn.Linear(1024, 512),
+                                 nn.ReLU(),
+                                 nn.Linear(512, 1))
+
+    def compute(self, states, taken_actions):
+        return self.net(states)
+
+
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="HumanoidAMP")   # preview 3 and 4 use the same loader
+env = wrap_env(env)
+
+device = env.device
+
+
+# Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
+memory = RandomMemory(memory_size=16, num_envs=env.num_envs, device=device)
+
+
+# Instantiate the agent's models (function approximators).
+# AMP requires 3 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.amp.html#spaces-and-models
+models_amp = {"policy": Policy(env.observation_space, env.action_space, device),
+              "value": Value(env.observation_space, env.action_space, device),
+              "discriminator": Discriminator(env.amp_observation_space, env.action_space, device)}
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.amp.html#configuration-and-hyperparameters
+cfg_amp = AMP_DEFAULT_CONFIG.copy()
+cfg_amp["rollouts"] = 16
+cfg_amp["learning_epochs"] = 6
+cfg_amp["mini_batches"] = 2  # 16 * 4096 / 32768
+cfg_amp["discount_factor"] = 0.99
+cfg_amp["lambda"] = 0.95
+cfg_amp["learning_rate"] = 5e-5
+cfg_amp["random_timesteps"] = 0
+cfg_amp["learning_starts"] = 0
+cfg_amp["grad_norm_clip"] = 0.0
+cfg_amp["ratio_clip"] = 0.2
+cfg_amp["value_clip"] = 0.2
+cfg_amp["clip_predicted_values"] = False
+cfg_amp["entropy_loss_scale"] = 0.0
+cfg_amp["value_loss_scale"] = 2.5
+cfg_amp["discriminator_loss_scale"] = 5.0
+cfg_amp["amp_batch_size"] = 512
+cfg_amp["task_reward_weight"] = 0.0
+cfg_amp["style_reward_weight"] = 1.0
+cfg_amp["discriminator_reward_scale"] = 2
+cfg_amp["discriminator_logit_regularization_scale"] = 0.05
+cfg_amp["discriminator_gradient_penalty_scale"] = 5
+cfg_amp["discriminator_weight_decay_scale"] = 0.0001
+cfg_amp["state_preprocessor"] = RunningStandardScaler
+cfg_amp["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_amp["value_preprocessor"] = RunningStandardScaler
+cfg_amp["value_preprocessor_kwargs"] = {"size": 1, "device": device}
+cfg_amp["amp_state_preprocessor"] = RunningStandardScaler
+cfg_amp["amp_state_preprocessor_kwargs"] = {"size": env.amp_observation_space, "device": device}
+# logging to TensorBoard and write checkpoints each 16 and 4000 timesteps respectively
+cfg_amp["experiment"]["write_interval"] = 160
+cfg_amp["experiment"]["checkpoint_interval"] = 4000
+
+agent = AMP(models=models_amp,
+            memory=memory, 
+            cfg=cfg_amp, 
+            observation_space=env.observation_space, 
+            action_space=env.action_space,
+            device=device,
+            amp_observation_space=env.amp_observation_space,
+            motion_dataset=RandomMemory(memory_size=200000, device=device),
+            reply_buffer=RandomMemory(memory_size=1000000, device=device),
+            collect_reference_motions=lambda num_samples: env.fetch_amp_obs_demo(num_samples),
+            collect_observation=lambda: env.reset_done()[0]["obs"])
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 80000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start training
+trainer.train()
diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index bf3cd29b..cf8f8f2e 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -273,6 +273,7 @@ The following components or practices are exemplified (highlighted):
     - Set a random seed for reproducibility: **Cartpole**
     - Set a learning rate scheduler: **FrankaCabinet**, **Humanoid**
     - Define a reward shaping function: **Quadcopter**, **ShadowHand**, **Trifinger**
+    - Access to environment-specific properties and methods: **Humanoid (AMP)**
     - Load a checkpoint during evaluation: **Cartpole**
 
 The PPO agent configuration is mapped, as far as possible, from the rl_games' A2C-PPO `configuration for Isaac Gym preview environments <https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/tree/main/isaacgymenvs/cfg/train>`_. The following list shows the mapping between the two configurations
@@ -389,6 +390,15 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
                     :linenos:
                     :emphasize-lines: 10, 97-98
 
+            .. tab:: Humanoid (AMP)
+                
+                View the raw code: `ppo_humanoid.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/amp_humanoid.py>`_
+
+                .. literalinclude:: ../examples/isaacgym/amp_humanoid.py
+                    :language: python
+                    :linenos:
+                    :emphasize-lines: 86, 120, 131, 134-135
+
             .. tab:: Ingenuity
                 
                 View the raw code: `ppo_ingenuity.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_ingenuity.py>`_

From 851f90770f3f4bd0c8e8212cc0009f1bc5932f3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 25 Jul 2022 22:15:18 +0200
Subject: [PATCH 007/108] Use a normal distribution in the gaussian model

---
 skrl/models/torch/gaussian.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index 6b1e00c1..a5913d32 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -3,7 +3,7 @@
 import gym
 
 import torch
-from torch.distributions import MultivariateNormal
+from torch.distributions import Normal
 
 from . import Model
 
@@ -17,7 +17,7 @@ def __init__(self,
                  clip_log_std: bool = True, 
                  min_log_std: float = -20, 
                  max_log_std: float = 2) -> None:
-        """Diagonal Gaussian model (stochastic model)
+        """Gaussian model (stochastic model)
 
         :param observation_space: Observation/state space or shape (default: None).
                                   If it is not None, the num_observations property will contain the size of that space
@@ -90,10 +90,7 @@ def act(self,
         self._num_samples = actions_mean.shape[0]
 
         # distribution
-        covariance = torch.diag(log_std.exp() * log_std.exp())
-        self._distribution = MultivariateNormal(actions_mean, scale_tril=covariance)
-        # self._distribution.loc = actions_mean
-        # self._distribution._unbroadcasted_scale_tril = covariance
+        self._distribution = Normal(actions_mean, log_std.exp())
 
         # sample using the reparameterization trick
         actions = self._distribution.rsample()
@@ -107,8 +104,6 @@ def act(self,
         
         # log of the probability density function
         log_prob = self._distribution.log_prob(actions if taken_actions is None else taken_actions)
-        if log_prob.dim() != actions.dim():
-            log_prob = log_prob.unsqueeze(-1)
 
         if inference:
             return actions.detach(), log_prob.detach(), actions_mean.detach()
@@ -132,10 +127,10 @@ def get_log_std(self) -> torch.Tensor:
         """
         return self._log_std.repeat(self._num_samples, 1)
     
-    def distribution(self) -> torch.distributions.MultivariateNormal:
+    def distribution(self) -> torch.distributions.Normal:
         """Get the current distribution of the model
 
         :return: Distribution of the model
-        :rtype: torch.distributions.MultivariateNormal
+        :rtype: torch.distributions.Normal
         """
         return self._distribution

From 62ba65633a87eefee7e863c57467129a6198ac59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 25 Jul 2022 23:34:23 +0200
Subject: [PATCH 008/108] Define the multivariate gaussian model in a separate
 file

---
 skrl/models/torch/__init__.py              |   2 +-
 skrl/models/torch/multivariate_gaussian.py | 139 +++++++++++++++++++++
 2 files changed, 140 insertions(+), 1 deletion(-)
 create mode 100644 skrl/models/torch/multivariate_gaussian.py

diff --git a/skrl/models/torch/__init__.py b/skrl/models/torch/__init__.py
index c3b94db1..de02edaa 100644
--- a/skrl/models/torch/__init__.py
+++ b/skrl/models/torch/__init__.py
@@ -1,7 +1,7 @@
 from .base import Model
 
 from .tabular import TabularModel
-
 from .gaussian import GaussianModel
 from .categorical import CategoricalModel
 from .deterministic import DeterministicModel
+from .multivariate_gaussian import MultivariateGaussianModel
diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py
new file mode 100644
index 00000000..1f61f275
--- /dev/null
+++ b/skrl/models/torch/multivariate_gaussian.py
@@ -0,0 +1,139 @@
+from typing import Union, Tuple
+
+import gym
+
+import torch
+from torch.distributions import MultivariateNormal
+
+from . import Model
+
+
+class MultivariateGaussianModel(Model):
+    def __init__(self, 
+                 observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
+                 action_space: Union[int, Tuple[int], gym.Space, None] = None, 
+                 device: Union[str, torch.device] = "cuda:0", 
+                 clip_actions: bool = False, 
+                 clip_log_std: bool = True, 
+                 min_log_std: float = -20, 
+                 max_log_std: float = 2) -> None:
+        """Multivariate Gaussian model (stochastic model)
+
+        :param observation_space: Observation/state space or shape (default: None).
+                                  If it is not None, the num_observations property will contain the size of that space
+        :type observation_space: int, tuple or list of integers, gym.Space or None, optional
+        :param action_space: Action space or shape (default: None).
+                             If it is not None, the num_actions property will contain the size of that space
+        :type action_space: int, tuple or list of integers, gym.Space or None, optional
+        :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0")
+        :type device: str or torch.device, optional
+        :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: False)
+        :type clip_actions: bool, optional
+        :param clip_log_std: Flag to indicate whether the log standard deviations should be clipped (default: True)
+        :type clip_log_std: bool, optional
+        :param min_log_std: Minimum value of the log standard deviation if clip_log_std is True (default: -20)
+        :type min_log_std: float, optional
+        :param max_log_std: Maximum value of the log standard deviation if clip_log_std is True (default: 2)
+        :type max_log_std: float, optional
+        """
+        super(MultivariateGaussianModel, self).__init__(observation_space, action_space, device)
+        
+        self.clip_actions = clip_actions and issubclass(type(self.action_space), gym.Space)
+
+        if self.clip_actions:
+            self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device)
+            self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device)
+            
+            # backward compatibility: torch < 1.9 clamp method does not support tensors
+            self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9)
+
+        self.clip_log_std = clip_log_std
+        self.log_std_min = min_log_std
+        self.log_std_max = max_log_std
+
+        self._log_std = None
+        self._num_samples = None
+        self._distribution = None
+        
+    def act(self, 
+            states: torch.Tensor, 
+            taken_actions: Union[torch.Tensor, None] = None, 
+            inference=False) -> Tuple[torch.Tensor]:
+        """Act stochastically in response to the state of the environment
+
+        :param states: Observation/state of the environment used to make the decision
+        :type states: torch.Tensor
+        :param taken_actions: Actions taken by a policy to the given states (default: None).
+                              The use of these actions only makes sense in critical models, e.g.
+        :type taken_actions: torch.Tensor or None, optional
+        :param inference: Flag to indicate whether the model is making inference (default: False).
+                          If True, the returned tensors will be detached from the current graph
+        :type inference: bool, optional
+        
+        :return: Action to be taken by the agent given the state of the environment.
+                 The tuple's components are the actions, the log of the probability density function and mean actions
+        :rtype: tuple of torch.Tensor
+        """
+        # map from states/observations to mean actions and log standard deviations
+        if self._instantiator_net is None:
+            actions_mean, log_std = self.compute(states.to(self.device), 
+                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+        else:
+            actions_mean, log_std = self._get_instantiator_output(states.to(self.device), \
+                taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+        
+        # clamp log standard deviations
+        if self.clip_log_std:
+            log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
+
+        self._log_std = log_std
+        self._num_samples = actions_mean.shape[0]
+
+        # distribution
+        covariance = torch.diag(log_std.exp() * log_std.exp())
+        self._distribution = MultivariateNormal(actions_mean, scale_tril=covariance)
+
+        # sample using the reparameterization trick
+        actions = self._distribution.rsample()
+
+        # clip actions
+        if self.clip_actions:
+            if self._backward_compatibility:
+                actions = torch.max(torch.min(actions, self.clip_actions_max), self.clip_actions_min)
+            else:
+                actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max)
+        
+        # log of the probability density function
+        log_prob = self._distribution.log_prob(actions if taken_actions is None else taken_actions)
+        if log_prob.dim() != actions.dim():
+            log_prob = log_prob.unsqueeze(-1)
+
+        if inference:
+            return actions.detach(), log_prob.detach(), actions_mean.detach()
+        return actions, log_prob, actions_mean
+
+    def get_entropy(self) -> torch.Tensor:
+        """Compute and return the entropy of the model
+
+        :return: Entropy of the model
+        :rtype: torch.Tensor
+        """
+        if self._distribution is None:
+            return torch.tensor(0.0, device=self.device)
+        return self._distribution.entropy().to(self.device)
+
+    def get_log_std(self) -> torch.Tensor:
+        """Return the log standard deviation of the model
+
+        :return: Log standard deviation of the model
+        :rtype: torch.Tensor
+        """
+        return self._log_std.repeat(self._num_samples, 1)
+    
+    def distribution(self) -> torch.distributions.MultivariateNormal:
+        """Get the current distribution of the model
+
+        :return: Distribution of the model
+        :rtype: torch.distributions.MultivariateNormal
+        """
+        return self._distribution

From afdc25fc1be970fd283fc8a4ca78312d24f3eb3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 25 Jul 2022 23:40:17 +0200
Subject: [PATCH 009/108] Add reduction methods for returning the log
 probability density function

---
 skrl/models/torch/gaussian.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index a5913d32..24b117f0 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -16,7 +16,8 @@ def __init__(self,
                  clip_actions: bool = False, 
                  clip_log_std: bool = True, 
                  min_log_std: float = -20, 
-                 max_log_std: float = 2) -> None:
+                 max_log_std: float = 2,
+                 reduction: str = "sum") -> None:
         """Gaussian model (stochastic model)
 
         :param observation_space: Observation/state space or shape (default: None).
@@ -35,6 +36,12 @@ def __init__(self,
         :type min_log_std: float, optional
         :param max_log_std: Maximum value of the log standard deviation if clip_log_std is True (default: 2)
         :type max_log_std: float, optional
+        :param reduction: Reduction method for returning the log probability density function: (default: "sum").
+                          Supported values are "mean", "sum", "prod" and "none". If "none", the log probability density 
+                          function is returned as a tensor of shape (num_samples, num_actions) instead of (num_samples, 1)
+        :type reduction: str, optional
+
+        :raises ValueError: If the reduction method is not valid
         """
         super(GaussianModel, self).__init__(observation_space, action_space, device)
         
@@ -55,6 +62,11 @@ def __init__(self,
         self._num_samples = None
         self._distribution = None
         
+        if reduction not in ["mean", "sum", "prod", "none"]:
+            raise ValueError("reduction must be one of 'mean', 'sum', 'prod' or 'none'")
+        self._reduction = torch.mean if reduction == "mean" else torch.sum if reduction == "sum" \
+            else torch.prod if reduction == "prod" else None
+
     def act(self, 
             states: torch.Tensor, 
             taken_actions: Union[torch.Tensor, None] = None, 
@@ -104,6 +116,10 @@ def act(self,
         
         # log of the probability density function
         log_prob = self._distribution.log_prob(actions if taken_actions is None else taken_actions)
+        if self._reduction is not None:
+            log_prob = self._reduction(log_prob, dim=-1)
+        if log_prob.dim() != actions.dim():
+            log_prob = log_prob.unsqueeze(-1)
 
         if inference:
             return actions.detach(), log_prob.detach(), actions_mean.detach()

From 2dd0fb96d6a3d56c05aed2924a6cc08e96a3e041 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 26 Jul 2022 18:29:33 +0200
Subject: [PATCH 010/108] Show the changelog in docs

---
 docs/source/intro/installation.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/source/intro/installation.rst b/docs/source/intro/installation.rst
index 5e0dacec..1663cccb 100644
--- a/docs/source/intro/installation.rst
+++ b/docs/source/intro/installation.rst
@@ -77,3 +77,9 @@ Known issues
     .. code-block:: text
         
         AttributeError: 'Adam' object has no attribute '_warned_capturable_if_run_uncaptured'
+
+Changelog
+---------
+
+.. literalinclude:: ../../../CHANGELOG.md
+    :language: markdown

From ea42867f0c58d35751f334c4a1fdf244a1db47b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 26 Jul 2022 22:39:27 +0200
Subject: [PATCH 011/108] Add multivariate gaussian model schema

---
 docs/source/_static/imgs/model_gaussian.svg              | 2 +-
 docs/source/_static/imgs/model_multivariate_gaussian.svg | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)
 mode change 100644 => 100755 docs/source/_static/imgs/model_gaussian.svg
 create mode 100644 docs/source/_static/imgs/model_multivariate_gaussian.svg

diff --git a/docs/source/_static/imgs/model_gaussian.svg b/docs/source/_static/imgs/model_gaussian.svg
old mode 100644
new mode 100755
index 19bd5771..92fa89ca
--- a/docs/source/_static/imgs/model_gaussian.svg
+++ b/docs/source/_static/imgs/model_gaussian.svg
@@ -1 +1 @@
-<svg width="3568" height="1235" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" overflow="hidden"><defs><clipPath id="clip0"><rect x="155" y="1752" width="3568" height="1235"/></clipPath><clipPath id="clip1"><rect x="2443" y="2285" width="482" height="370"/></clipPath><clipPath id="clip2"><rect x="2443" y="2285" width="482" height="370"/></clipPath><clipPath id="clip3"><rect x="2443" y="2285" width="482" height="370"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-155 -1752)"><path d="M1027 2075C1027 2050.7 1046.7 2031 1071 2031 1095.3 2031 1115 2050.7 1115 2075 1115 2099.3 1095.3 2119 1071 2119 1046.7 2119 1027 2099.3 1027 2075Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2228C1027 2203.7 1046.7 2184 1071 2184 1095.3 2184 1115 2203.7 1115 2228 1115 2252.3 1095.3 2272 1071 2272 1046.7 2272 1027 2252.3 1027 2228Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2380C1027 2355.7 1046.7 2336 1071 2336 1095.3 2336 1115 2355.7 1115 2380 1115 2404.3 1095.3 2424 1071 2424 1046.7 2424 1027 2404.3 1027 2380Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2533C1027 2508.7 1046.7 2489 1071 2489 1095.3 2489 1115 2508.7 1115 2533 1115 2557.3 1095.3 2577 1071 2577 1046.7 2577 1027 2557.3 1027 2533Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2685C1027 2660.7 1046.7 2641 1071 2641 1095.3 2641 1115 2660.7 1115 2685 1115 2709.3 1095.3 2729 1071 2729 1046.7 2729 1027 2709.3 1027 2685Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2075C1257 2050.7 1276.7 2031 1301 2031 1325.3 2031 1345 2050.7 1345 2075 1345 2099.3 1325.3 2119 1301 2119 1276.7 2119 1257 2099.3 1257 2075Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2228C1257 2203.7 1276.7 2184 1301 2184 1325.3 2184 1345 2203.7 1345 2228 1345 2252.3 1325.3 2272 1301 2272 1276.7 2272 1257 2252.3 1257 2228Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2380C1257 2355.7 1276.7 2336 1301 2336 1325.3 2336 1345 2355.7 1345 2380 1345 2404.3 1325.3 2424 1301 2424 1276.7 2424 1257 2404.3 1257 2380Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2533C1257 2508.7 1276.7 2489 1301 2489 1325.3 2489 1345 2508.7 1345 2533 1345 2557.3 1325.3 2577 1301 2577 1276.7 2577 1257 2557.3 1257 2533Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2685C1257 2660.7 1276.7 2641 1301 2641 1325.3 2641 1345 2660.7 1345 2685 1345 2709.3 1325.3 2729 1301 2729 1276.7 2729 1257 2709.3 1257 2685Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1487 2152C1487 2127.7 1506.7 2108 1531 2108 1555.3 2108 1575 2127.7 1575 2152 1575 2176.3 1555.3 2196 1531 2196 1506.7 2196 1487 2176.3 1487 2152Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1487 2305C1487 2280.7 1506.7 2261 1531 2261 1555.3 2261 1575 2280.7 1575 2305 1575 2329.3 1555.3 2349 1531 2349 1506.7 2349 1487 2329.3 1487 2305Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1487 2457C1487 2432.7 1506.7 2413 1531 2413 1555.3 2413 1575 2432.7 1575 2457 1575 2481.3 1555.3 2501 1531 2501 1506.7 2501 1487 2481.3 1487 2457Z" fill="#C00000" fill-rule="evenodd"/><path d="M797 2228C797 2203.7 816.7 2184 841 2184 865.301 2184 885 2203.7 885 2228 885 2252.3 865.301 2272 841 2272 816.7 2272 797 2252.3 797 2228Z" fill="#70AD47" fill-rule="evenodd"/><path d="M797 2380C797 2355.7 816.7 2336 841 2336 865.301 2336 885 2355.7 885 2380 885 2404.3 865.301 2424 841 2424 816.7 2424 797 2404.3 797 2380Z" fill="#70AD47" fill-rule="evenodd"/><path d="M797 2533C797 2508.7 816.7 2489 841 2489 865.301 2489 885 2508.7 885 2533 885 2557.3 865.301 2577 841 2577 816.7 2577 797 2557.3 797 2533Z" fill="#70AD47" fill-rule="evenodd"/><path d="M1361.5 2075.5 1471.84 2152.25" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 75.5611" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2228.06)"/><path d="M0 0 110.337 227.87" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2380.37)"/><path d="M0 0 110.337 380.178" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2532.68)"/><path d="M0 0 110.337 532.487" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2684.99)"/><path d="M1471.84 2305.25 1361.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 75.5611" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2380.06)"/><path d="M0 0 110.337 227.87" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2532.37)"/><path d="M0 0 110.337 380.178" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2684.68)"/><path d="M1131.5 2075.5 1241.84 2075.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2227.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2380.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2532.43)"/><path d="M0 0 110.337 609.234" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2684.73)"/><path d="M1131.5 2075.5 1241.84 2227.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2380.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2533.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2685.43)"/><path d="M1131.5 2075.5 1241.84 2380.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2380.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2380.5 1241.84 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2532.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2685.12)"/><path d="M1131.5 2684.5 1241.84 2684.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2532.5 1241.84 2684.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2380.5 1241.84 2685.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2685.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2075.5 1241.84 2684.73" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2684.81)"/><path d="M1131.5 2532.5 1241.84 2532.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2380.5 1241.84 2532.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2533.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2075.5 1241.84 2532.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M901.5 2532.5 1011.84 2684.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2532.5)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2380.5)"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2380.5)"/><path d="M1011.84 2380.81 901.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2075.5)"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2228.5)"/><path d="M1011.84 2533.12 901.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1011.84 2685.43 901.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2075.5)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2228.5)"/><path d="M1011.84 2532.81 901.5 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1011.84 2685.12 901.5 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 901.5 2533.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 901.5 2532.43)"/><path d="M1361.5 2075.5 1471.84 2304.56" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 785.207 2082)">input<tspan fill="#5B9BD5" font-size="64" x="303.023" y="-90">hidden</tspan><tspan fill="#ED7D31" font-size="64" x="645.891" y="-42">output</tspan><tspan fill="#595959" font-weight="700" font-size="64" x="203.623" y="-248">.compute(…)</tspan></text><path d="M0 0 81.1221 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 781.622 2228.5)"/><path d="M781.622 2380.5 700.5 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 81.1221 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 781.622 2532.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.62 2457.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.62 2304.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.62 2152.5)"/><rect x="736" y="1892" width="900" height="893" stroke="#BFBFBF" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="none"/><path d="M626 2197C638.15 2197 648 2198.64 648 2200.67L648 2376.33C648 2378.36 657.85 2380 670 2380 657.85 2380 648 2381.64 648 2383.67L648 2559.33C648 2561.36 638.15 2563 626 2563" stroke="#70AD47" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 249.482 2325)">states (</text><path d="M492.763 2293.58C494.368 2293.58 496.092 2293.71 497.935 2293.97 499.779 2294.23 501.461 2294.58 502.982 2295.02L501.357 2302.48 497.388 2302.48C497.201 2300.55 496.706 2299.12 495.904 2298.2 495.102 2297.29 493.951 2296.83 492.451 2296.83 491.055 2296.83 489.935 2297.19 489.092 2297.91 488.248 2298.62 487.826 2299.6 487.826 2300.83 487.826 2301.91 488.18 2302.86 488.888 2303.67 489.597 2304.48 490.972 2305.51 493.013 2306.73 495.222 2308.05 496.805 2309.4 497.763 2310.8 498.722 2312.19 499.201 2313.83 499.201 2315.7 499.201 2317.83 498.633 2319.64 497.498 2321.14 496.362 2322.64 494.81 2323.76 492.842 2324.48 490.873 2325.21 488.628 2325.58 486.107 2325.58 482.066 2325.58 478.191 2325.08 474.482 2324.08L476.232 2316.33 480.201 2316.33C480.305 2318.2 480.789 2319.67 481.654 2320.73 482.519 2321.8 483.847 2322.33 485.638 2322.33 487.284 2322.33 488.555 2321.93 489.451 2321.12 490.347 2320.32 490.795 2319.19 490.795 2317.73 490.795 2316.86 490.649 2316.12 490.357 2315.52 490.066 2314.91 489.586 2314.32 488.92 2313.75 488.253 2313.18 487.17 2312.43 485.67 2311.52 483.586 2310.27 482.071 2308.97 481.123 2307.62 480.175 2306.28 479.701 2304.78 479.701 2303.11 479.701 2300.05 480.847 2297.69 483.138 2296.05 485.43 2294.4 488.638 2293.58 492.763 2293.58Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="474.482" y="2321.58">𝒔</text><path d="M514.274 2309.66 519.782 2309.66 518.451 2315.4 524.028 2315.4 523.408 2318.27 517.786 2318.27 515.56 2327.45C515.177 2329.02 514.913 2330.22 514.768 2331.04 514.623 2331.86 514.55 2332.55 514.55 2333.11 514.55 2333.83 514.68 2334.36 514.94 2334.69 515.2 2335.02 515.583 2335.18 516.087 2335.18 516.776 2335.18 517.484 2334.94 518.21 2334.45 518.937 2333.96 519.721 2333.2 520.563 2332.17L522.33 2333.99C520.784 2335.69 519.365 2336.87 518.073 2337.53 516.78 2338.2 515.254 2338.53 513.494 2338.53 511.75 2338.53 510.411 2338.08 509.478 2337.19 508.545 2336.29 508.078 2335.06 508.078 2333.48 508.078 2332.76 508.132 2332.08 508.239 2331.43 508.346 2330.78 508.568 2329.71 508.904 2328.23L511.268 2318.27 507.688 2318.27 508.124 2316.41C509.195 2316.41 510.006 2316.31 510.557 2316.11 511.108 2315.91 511.57 2315.61 511.945 2315.21 512.32 2314.82 512.683 2314.21 513.035 2313.4 513.387 2312.59 513.8 2311.34 514.274 2309.66Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="507.688" y="2334.92">𝒕</text><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 527.141 2325)">)<tspan font-weight="400" font-size="64" x="-332.75" y="77">with or without</tspan><tspan font-size="64" x="-301.148" y="154">actions (</tspan></text><path d="M510.326 2450.83C508.388 2450.83 506.67 2451.8 505.17 2453.75 503.67 2455.7 502.461 2458.32 501.545 2461.61 500.628 2464.9 500.17 2467.85 500.17 2470.45 500.17 2471.97 500.394 2473.11 500.842 2473.86 501.289 2474.61 501.982 2474.98 502.92 2474.98 504.295 2474.98 505.696 2474.31 507.123 2472.97 508.55 2471.62 509.701 2470.05 510.576 2468.23 511.451 2466.42 512.232 2463.94 512.92 2460.8L513.17 2459.64C513.503 2458.1 513.67 2456.56 513.67 2455.02 513.67 2453.64 513.409 2452.6 512.888 2451.89 512.368 2451.18 511.513 2450.83 510.326 2450.83ZM509.951 2447.58C513.055 2447.58 515.701 2448.33 517.888 2449.83L520.92 2447.58 524.42 2448.08 519.357 2469.33C519.045 2470.64 518.888 2471.83 518.888 2472.89 518.888 2473.58 519.003 2474.11 519.232 2474.48 519.461 2474.86 519.857 2475.05 520.42 2475.05 521.065 2475.05 521.763 2474.76 522.513 2474.17 523.263 2473.59 524.138 2472.72 525.138 2471.58L527.388 2473.83C525.201 2476.02 523.315 2477.52 521.732 2478.34 520.149 2479.17 518.482 2479.58 516.732 2479.58 514.982 2479.58 513.597 2479.09 512.576 2478.12 511.555 2477.16 511.045 2475.88 511.045 2474.3 511.045 2473.78 511.107 2473.28 511.232 2472.8L510.795 2472.73C509.274 2474.53 507.967 2475.88 506.873 2476.78 505.779 2477.69 504.618 2478.38 503.388 2478.86 502.159 2479.34 500.795 2479.58 499.295 2479.58 496.795 2479.58 494.899 2478.73 493.607 2477.03 492.316 2475.33 491.67 2472.86 491.67 2469.61 491.67 2466.8 492.076 2464.01 492.888 2461.23 493.701 2458.46 494.868 2456.07 496.388 2454.06 497.909 2452.05 499.821 2450.47 502.123 2449.31 504.425 2448.16 507.034 2447.58 509.951 2447.58Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="491.67" y="2475.58">𝒂</text><path d="M537.764 2463.66 543.272 2463.66 541.941 2469.4 547.518 2469.4 546.898 2472.27 541.275 2472.27 539.049 2481.45C538.667 2483.02 538.403 2484.22 538.258 2485.04 538.112 2485.86 538.039 2486.55 538.039 2487.11 538.039 2487.83 538.17 2488.36 538.43 2488.69 538.69 2489.02 539.072 2489.18 539.577 2489.18 540.266 2489.18 540.973 2488.94 541.7 2488.45 542.427 2487.96 543.211 2487.2 544.052 2486.17L545.819 2487.99C544.274 2489.69 542.855 2490.87 541.562 2491.53 540.269 2492.2 538.743 2492.53 536.984 2492.53 535.24 2492.53 533.901 2492.08 532.968 2491.19 532.034 2490.29 531.568 2489.06 531.568 2487.48 531.568 2486.76 531.621 2486.08 531.728 2485.43 531.836 2484.78 532.057 2483.71 532.394 2482.23L534.758 2472.27 531.178 2472.27 531.614 2470.41C532.685 2470.41 533.496 2470.31 534.046 2470.11 534.597 2469.91 535.06 2469.61 535.435 2469.21 535.81 2468.82 536.173 2468.21 536.525 2467.4 536.877 2466.59 537.29 2465.34 537.764 2463.66Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="531.178" y="2488.92">𝒕</text><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 550.63 2479)">)</text><path d="M1707 2418C1719.43 2418 1729.5 2419.68 1729.5 2421.75L1729.5 2527.75C1729.5 2529.82 1739.57 2531.5 1752 2531.5 1739.57 2531.5 1729.5 2533.18 1729.5 2535.25L1729.5 2641.25C1729.5 2643.32 1719.43 2645 1707 2645" stroke="#C00000" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M2001 2799C2001 2774.7 2020.48 2755 2044.5 2755 2068.52 2755 2088 2774.7 2088 2799 2088 2823.3 2068.52 2843 2044.5 2843 2020.48 2843 2001 2823.3 2001 2799Z" stroke="#C00000" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M2018 2799.5C2018 2784.86 2029.86 2773 2044.5 2773 2059.14 2773 2071 2784.86 2071 2799.5 2071 2814.14 2059.14 2826 2044.5 2826 2029.86 2826 2018 2814.14 2018 2799.5Z" fill="#C00000" fill-rule="evenodd"/><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1865.77 2518)">log standard<tspan font-size="64" x="-85.3645" y="77">deviations</tspan><tspan font-size="64" x="220.573" y="77">(</tspan></text><text fill="#C00000" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="64" transform="translate(2107.54 2595)">𝑙<tspan font-size="64" x="20.052" y="0">𝑜</tspan><tspan font-size="64" x="54.427" y="0">𝑔</tspan></text><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2201.5 2595)">(</text><text fill="#C00000" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="64" transform="translate(2222.7 2595)">𝜎</text><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2260.51 2595)">))<tspan fill="#ED7D31" font-size="64" x="-401.445" y="-383">mean actions</tspan><tspan fill="#ED7D31" font-size="64" x="-292.82" y="-306">(</tspan></text><path d="M2008.92 2260.71C2006.77 2260.71 2004.85 2261.57 2003.14 2263.3 2001.43 2265.03 2000.1 2267.35 1999.14 2270.26 1998.18 2273.16 1997.7 2276.06 1997.7 2278.96 1997.7 2281.13 1998 2282.74 1998.59 2283.82 1999.18 2284.89 2000.18 2285.43 2001.57 2285.43 2002.95 2285.43 2004.3 2284.83 2005.62 2283.65 2006.94 2282.46 2008.31 2280.77 2009.72 2278.57 2011.12 2276.37 2012.12 2273.83 2012.7 2270.96L2012.98 2269.61C2013.17 2268.76 2013.29 2268.02 2013.36 2267.4 2013.42 2266.77 2013.45 2266.09 2013.45 2265.36 2013.45 2263.76 2013.11 2262.58 2012.42 2261.83 2011.73 2261.08 2010.56 2260.71 2008.92 2260.71ZM2008.29 2258.24C2009.65 2258.24 2010.91 2258.39 2012.09 2258.69 2013.27 2258.99 2014.48 2259.53 2015.73 2260.3L2018.61 2258.24 2020.61 2258.74 2016.01 2278.55C2015.55 2280.53 2015.32 2282.07 2015.32 2283.18 2015.32 2284.01 2015.46 2284.61 2015.73 2284.99 2016 2285.36 2016.43 2285.55 2017.01 2285.55 2017.64 2285.55 2018.3 2285.3 2019.01 2284.79 2019.72 2284.28 2020.72 2283.3 2022.01 2281.86L2023.79 2283.61C2021.92 2285.61 2020.32 2287.04 2019 2287.88 2017.67 2288.72 2016.24 2289.15 2014.7 2289.15 2013.41 2289.15 2012.38 2288.72 2011.61 2287.86 2010.83 2287.01 2010.45 2285.88 2010.45 2284.46 2010.45 2283.31 2010.69 2282.09 2011.17 2280.8L2010.76 2280.68C2008.76 2283.59 2006.87 2285.73 2005.07 2287.08 2003.28 2288.44 2001.39 2289.11 1999.39 2289.11 1997.1 2289.11 1995.32 2288.25 1994.06 2286.52 1992.8 2284.79 1992.17 2282.36 1992.17 2279.24 1992.17 2275.66 1992.87 2272.21 1994.28 2268.91 1995.68 2265.61 1997.62 2263.01 2000.07 2261.1 2002.53 2259.19 2005.27 2258.24 2008.29 2258.24Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="30.9062" x="1992.17" y="2285.28">𝑎</text><path d="M1996.15 2247.93 2023.68 2247.93 2023.68 2251.24 1996.15 2251.24Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="3.3125" x="1996.15" y="2250.83">ത</text><path d="M2034.28 2272.95 2037.86 2272.95 2036.55 2278.78 2044.35 2278.78 2043.82 2281.33 2036.04 2281.33 2033.7 2290.95C2033.55 2291.59 2033.42 2292.17 2033.32 2292.68 2033.22 2293.19 2033.14 2293.65 2033.08 2294.07 2033.02 2294.48 2032.98 2294.85 2032.96 2295.19 2032.93 2295.53 2032.92 2295.84 2032.92 2296.13 2032.92 2297.05 2033.11 2297.77 2033.5 2298.29 2033.88 2298.81 2034.51 2299.07 2035.38 2299.07 2036.11 2299.07 2036.89 2298.8 2037.72 2298.25 2038.54 2297.71 2039.49 2296.86 2040.54 2295.7L2042.12 2297.3C2041.34 2298.17 2040.6 2298.92 2039.9 2299.53 2039.19 2300.14 2038.51 2300.64 2037.83 2301.03 2037.16 2301.42 2036.49 2301.7 2035.84 2301.88 2035.18 2302.06 2034.51 2302.14 2033.82 2302.14 2032 2302.14 2030.64 2301.72 2029.74 2300.86 2028.85 2300 2028.4 2298.74 2028.4 2297.07 2028.4 2296.51 2028.44 2295.88 2028.53 2295.2 2028.61 2294.52 2028.73 2293.86 2028.88 2293.22L2031.61 2281.33 2027.67 2281.33 2028.13 2279.56C2029.04 2279.56 2029.76 2279.47 2030.27 2279.3 2030.78 2279.12 2031.22 2278.88 2031.57 2278.58 2031.8 2278.36 2032.02 2278.1 2032.23 2277.78 2032.45 2277.47 2032.66 2277.09 2032.88 2276.65 2033.09 2276.2 2033.31 2275.68 2033.53 2275.08 2033.75 2274.47 2034 2273.76 2034.28 2272.95Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.1914" x="2027.67" y="2298.5">𝑡</text><path d="M2061.53 2274.47 2065.25 2274.47 2065.25 2286.54 2076.61 2286.54 2076.61 2290.05 2065.25 2290.05 2065.25 2302.12 2061.53 2302.12 2061.53 2290.05 2050.17 2290.05 2050.17 2286.54 2061.53 2286.54Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="2050.17" y="2298.66">+</text><path d="M2095.59 2272.17 2096.99 2272.17C2096.96 2273.01 2096.93 2273.77 2096.92 2274.43 2096.9 2275.1 2096.89 2275.75 2096.88 2276.37 2096.88 2277 2096.87 2277.62 2096.87 2278.23L2096.87 2295.67C2096.87 2296.15 2096.89 2296.56 2096.92 2296.9 2096.95 2297.24 2097 2297.54 2097.07 2297.77 2097.14 2298.01 2097.23 2298.21 2097.34 2298.38 2097.46 2298.55 2097.59 2298.69 2097.74 2298.82 2097.93 2299 2098.17 2299.14 2098.48 2299.24 2098.78 2299.34 2099.17 2299.43 2099.65 2299.49 2100.12 2299.56 2100.69 2299.62 2101.36 2299.65 2102.02 2299.69 2102.82 2299.72 2103.76 2299.73L2103.76 2301.73 2085.56 2301.73 2085.56 2299.73C2086.46 2299.7 2087.23 2299.67 2087.85 2299.63 2088.48 2299.59 2089.02 2299.54 2089.46 2299.47 2089.9 2299.4 2090.26 2299.33 2090.53 2299.24 2090.79 2299.16 2091.03 2299.05 2091.23 2298.93 2091.44 2298.79 2091.62 2298.64 2091.78 2298.48 2091.93 2298.32 2092.05 2298.12 2092.14 2297.88 2092.24 2297.63 2092.31 2297.33 2092.36 2296.98 2092.42 2296.63 2092.44 2296.19 2092.44 2295.67L2092.44 2279.22C2092.44 2278.62 2092.33 2278.2 2092.1 2277.96 2091.87 2277.71 2091.54 2277.59 2091.11 2277.59 2090.64 2277.59 2089.93 2277.83 2089 2278.32 2088.07 2278.81 2086.86 2279.54 2085.37 2280.5 2085.17 2280.15 2084.99 2279.8 2084.82 2279.45 2084.65 2279.1 2084.48 2278.74 2084.3 2278.37 2086.19 2277.33 2088.07 2276.29 2089.93 2275.26 2091.79 2274.23 2093.67 2273.2 2095.59 2272.17Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.5586" x="2084.3" y="2298.04">1</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2109.68 2289)">)</text><path d="M2326 2115C2338.15 2115 2348 2116.64 2348 2118.67L2348 2376.33C2348 2378.36 2357.85 2380 2370 2380 2357.85 2380 2348 2381.64 2348 2383.67L2348 2641.33C2348 2643.36 2338.15 2645 2326 2645" stroke="#595959" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2520.68 2140)">multivariate<tspan font-size="64" x="-128.058" y="77">gaussian distribution</tspan></text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="translate(2467.31 2344)">𝒩</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 2513.72 2344)">(</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="translate(2533.77 2344)">𝜇</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 2560.13 2344)">,</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="translate(2569.87 2344)">𝛴</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 2599.09 2344)">)</text><path d="M3000 2115C3012.15 2115 3022 2116.64 3022 2118.67L3022 2376.33C3022 2378.36 3031.85 2380 3044 2380 3031.85 2380 3022 2381.64 3022 2383.67L3022 2641.33C3022 2643.36 3012.15 2645 3000 2645" stroke="#595959" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3183.49 2214)">actions (</text><path d="M3467.83 2185.75C3465.89 2185.75 3464.17 2186.73 3462.67 2188.68 3461.17 2190.62 3459.96 2193.24 3459.05 2196.53 3458.13 2199.83 3457.67 2202.77 3457.67 2205.38 3457.67 2206.9 3457.89 2208.03 3458.34 2208.78 3458.79 2209.53 3459.48 2209.91 3460.42 2209.91 3461.8 2209.91 3463.2 2209.24 3464.62 2207.89 3466.05 2206.55 3467.2 2204.97 3468.08 2203.16 3468.95 2201.35 3469.73 2198.87 3470.42 2195.72L3470.67 2194.57C3471 2193.02 3471.17 2191.48 3471.17 2189.94 3471.17 2188.57 3470.91 2187.52 3470.39 2186.82 3469.87 2186.11 3469.01 2185.75 3467.83 2185.75ZM3467.45 2182.5C3470.56 2182.5 3473.2 2183.25 3475.39 2184.75L3478.42 2182.5 3481.92 2183 3476.86 2204.25C3476.55 2205.57 3476.39 2206.75 3476.39 2207.82 3476.39 2208.5 3476.5 2209.03 3476.73 2209.41 3476.96 2209.78 3477.36 2209.97 3477.92 2209.97 3478.57 2209.97 3479.26 2209.68 3480.01 2209.1 3480.76 2208.51 3481.64 2207.65 3482.64 2206.5L3484.89 2208.75C3482.7 2210.94 3480.82 2212.45 3479.23 2213.27 3477.65 2214.09 3475.98 2214.5 3474.23 2214.5 3472.48 2214.5 3471.1 2214.02 3470.08 2213.05 3469.06 2212.08 3468.55 2210.81 3468.55 2209.22 3468.55 2208.7 3468.61 2208.2 3468.73 2207.72L3468.3 2207.66C3466.77 2209.45 3465.47 2210.8 3464.37 2211.71 3463.28 2212.61 3462.12 2213.31 3460.89 2213.78 3459.66 2214.26 3458.3 2214.5 3456.8 2214.5 3454.3 2214.5 3452.4 2213.65 3451.11 2211.96 3449.82 2210.26 3449.17 2207.78 3449.17 2204.53 3449.17 2201.72 3449.58 2198.93 3450.39 2196.16 3451.2 2193.39 3452.37 2191 3453.89 2188.99 3455.41 2186.98 3457.32 2185.39 3459.62 2184.24 3461.93 2183.08 3464.54 2182.5 3467.45 2182.5Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="3449.17" y="2210.5">𝒂</text><path d="M3495.27 2198.59 3500.77 2198.59 3499.44 2204.32 3505.02 2204.32 3504.4 2207.19 3498.78 2207.19 3496.55 2216.37C3496.17 2217.95 3495.9 2219.14 3495.76 2219.96 3495.61 2220.78 3495.54 2221.47 3495.54 2222.04 3495.54 2222.76 3495.67 2223.28 3495.93 2223.61 3496.19 2223.94 3496.57 2224.11 3497.08 2224.11 3497.77 2224.11 3498.47 2223.86 3499.2 2223.37 3499.93 2222.88 3500.71 2222.12 3501.55 2221.1L3503.32 2222.91C3501.78 2224.61 3500.36 2225.79 3499.06 2226.46 3497.77 2227.12 3496.24 2227.46 3494.48 2227.46 3492.74 2227.46 3491.4 2227.01 3490.47 2226.11 3489.54 2225.22 3489.07 2223.98 3489.07 2222.41 3489.07 2221.69 3489.12 2221 3489.23 2220.35 3489.34 2219.7 3489.56 2218.64 3489.9 2217.15L3492.26 2207.19 3488.68 2207.19 3489.11 2205.33C3490.19 2205.33 3491 2205.23 3491.55 2205.03 3492.1 2204.84 3492.56 2204.54 3492.94 2204.14 3493.31 2203.74 3493.67 2203.14 3494.03 2202.33 3494.38 2201.52 3494.79 2200.27 3495.27 2198.59Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="3488.68" y="2223.85">𝒕</text><path d="M3520.71 2199.83 3524.43 2199.83 3524.43 2211.9 3535.79 2211.9 3535.79 2215.41 3524.43 2215.41 3524.43 2227.48 3520.71 2227.48 3520.71 2215.41 3509.35 2215.41 3509.35 2211.9 3520.71 2211.9Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3509.35" y="2224.02">+</text><path d="M3555.02 2195.4 3557.96 2195.4C3557.86 2196.86 3557.82 2198.9 3557.82 2201.5L3557.82 2220.69C3557.82 2221.57 3557.87 2222.24 3557.97 2222.69 3558.07 2223.15 3558.24 2223.51 3558.49 2223.8 3558.75 2224.08 3559.11 2224.3 3559.58 2224.46 3560.06 2224.62 3560.66 2224.74 3561.38 2224.83 3562.09 2224.91 3563.04 2224.97 3564.22 2225L3564.22 2227.09 3544.42 2227.09 3544.42 2225C3546.11 2224.92 3547.33 2224.82 3548.05 2224.69 3548.78 2224.56 3549.33 2224.37 3549.72 2224.11 3550.1 2223.85 3550.38 2223.48 3550.55 2223 3550.73 2222.53 3550.82 2221.76 3550.82 2220.69L3550.82 2204.09C3550.82 2203.47 3550.7 2203.02 3550.46 2202.74 3550.23 2202.46 3549.9 2202.33 3549.49 2202.33 3549.1 2202.33 3548.57 2202.5 3547.87 2202.85 3547.17 2203.21 3546.03 2203.93 3544.44 2205.01L3543.04 2202.58Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="31.6929" x="3543.04" y="2223.13">𝟏</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3570.01 2214)">)<tspan font-size="64" x="-310.418" y="153">log prob </tspan><tspan font-weight="400" font-size="64" x="-426.101" y="230">evaluated at</tspan></text><path d="M3530.03 2416.23C3527.88 2416.23 3525.96 2417.1 3524.25 2418.83 3522.54 2420.56 3521.21 2422.87 3520.25 2425.78 3519.29 2428.69 3518.81 2431.59 3518.81 2434.48 3518.81 2436.65 3519.11 2438.27 3519.7 2439.34 3520.3 2440.42 3521.29 2440.95 3522.69 2440.95 3524.06 2440.95 3525.41 2440.36 3526.73 2439.17 3528.06 2437.98 3529.42 2436.29 3530.83 2434.09 3532.23 2431.89 3533.23 2429.36 3533.81 2426.48L3534.09 2425.14C3534.28 2424.29 3534.41 2423.55 3534.47 2422.92 3534.53 2422.3 3534.56 2421.62 3534.56 2420.89 3534.56 2419.29 3534.22 2418.11 3533.53 2417.36 3532.84 2416.61 3531.68 2416.23 3530.03 2416.23ZM3529.41 2413.76C3530.76 2413.76 3532.03 2413.92 3533.2 2414.22 3534.38 2414.52 3535.59 2415.06 3536.84 2415.83L3539.72 2413.76 3541.72 2414.26 3537.12 2434.08C3536.67 2436.06 3536.44 2437.6 3536.44 2438.7 3536.44 2439.54 3536.57 2440.14 3536.84 2440.51 3537.11 2440.89 3537.54 2441.08 3538.12 2441.08 3538.75 2441.08 3539.42 2440.82 3540.12 2440.31 3540.83 2439.8 3541.83 2438.83 3543.12 2437.39L3544.91 2439.14C3543.03 2441.14 3541.43 2442.56 3540.11 2443.41 3538.79 2444.25 3537.35 2444.67 3535.81 2444.67 3534.52 2444.67 3533.49 2444.24 3532.72 2443.39 3531.95 2442.54 3531.56 2441.4 3531.56 2439.98 3531.56 2438.84 3531.8 2437.62 3532.28 2436.33L3531.87 2436.2C3529.87 2439.12 3527.98 2441.25 3526.19 2442.61 3524.39 2443.96 3522.5 2444.64 3520.5 2444.64 3518.21 2444.64 3516.43 2443.78 3515.17 2442.05 3513.91 2440.32 3513.28 2437.89 3513.28 2434.76 3513.28 2431.18 3513.98 2427.74 3515.39 2424.44 3516.8 2421.13 3518.73 2418.53 3521.19 2416.62 3523.64 2414.72 3526.38 2413.76 3529.41 2413.76Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="30.9062" x="3513.28" y="2440.81">𝑎</text><path d="M3555.39 2428.48 3558.97 2428.48 3557.66 2434.31 3565.46 2434.31 3564.93 2436.85 3557.16 2436.85 3554.81 2446.47C3554.66 2447.11 3554.53 2447.69 3554.44 2448.2 3554.34 2448.72 3554.26 2449.18 3554.19 2449.59 3554.13 2450 3554.09 2450.38 3554.07 2450.72 3554.05 2451.05 3554.03 2451.37 3554.03 2451.66 3554.03 2452.57 3554.23 2453.29 3554.61 2453.81 3554.99 2454.33 3555.62 2454.59 3556.49 2454.59 3557.22 2454.59 3558 2454.32 3558.83 2453.78 3559.66 2453.24 3560.6 2452.38 3561.65 2451.22L3563.24 2452.83C3562.46 2453.7 3561.71 2454.44 3561.01 2455.05 3560.31 2455.67 3559.62 2456.17 3558.95 2456.56 3558.27 2456.95 3557.61 2457.23 3556.95 2457.41 3556.29 2457.58 3555.62 2457.67 3554.93 2457.67 3553.11 2457.67 3551.75 2457.24 3550.86 2456.38 3549.96 2455.53 3549.51 2454.27 3549.51 2452.6 3549.51 2452.03 3549.55 2451.41 3549.64 2450.73 3549.72 2450.05 3549.84 2449.38 3549.99 2448.74L3552.73 2436.85 3548.78 2436.85 3549.24 2435.09C3550.16 2435.09 3550.87 2435 3551.38 2434.82 3551.9 2434.65 3552.33 2434.41 3552.68 2434.1 3552.91 2433.89 3553.13 2433.62 3553.35 2433.31 3553.56 2433 3553.77 2432.62 3553.99 2432.17 3554.2 2431.73 3554.42 2431.21 3554.64 2430.6 3554.86 2430 3555.11 2429.29 3555.39 2428.48Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.1914" x="3548.78" y="2454.02">𝑡</text><path d="M3582.64 2429.99 3586.36 2429.99 3586.36 2442.06 3597.72 2442.06 3597.72 2445.58 3586.36 2445.58 3586.36 2457.65 3582.64 2457.65 3582.64 2445.58 3571.28 2445.58 3571.28 2442.06 3582.64 2442.06Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3571.28" y="2454.19">+</text><path d="M3616.7 2427.7 3618.1 2427.7C3618.07 2428.54 3618.04 2429.29 3618.03 2429.96 3618.01 2430.62 3618 2431.27 3617.99 2431.9 3617.99 2432.52 3617.98 2433.14 3617.98 2433.76L3617.98 2451.2C3617.98 2451.67 3618 2452.08 3618.03 2452.43 3618.06 2452.77 3618.11 2453.06 3618.18 2453.3 3618.25 2453.53 3618.34 2453.74 3618.45 2453.91 3618.57 2454.07 3618.7 2454.22 3618.86 2454.34 3619.04 2454.53 3619.28 2454.67 3619.59 2454.77 3619.9 2454.87 3620.29 2454.95 3620.76 2455.02 3621.23 2455.09 3621.8 2455.14 3622.47 2455.18 3623.14 2455.22 3623.93 2455.24 3624.87 2455.26L3624.87 2457.26 3606.67 2457.26 3606.67 2455.26C3607.57 2455.23 3608.34 2455.19 3608.96 2455.16 3609.59 2455.12 3610.13 2455.06 3610.57 2455 3611.01 2454.93 3611.37 2454.85 3611.64 2454.77 3611.91 2454.68 3612.14 2454.58 3612.34 2454.46 3612.55 2454.32 3612.74 2454.17 3612.89 2454.01 3613.04 2453.85 3613.16 2453.65 3613.26 2453.4 3613.35 2453.16 3613.42 2452.86 3613.47 2452.51 3613.53 2452.15 3613.55 2451.72 3613.55 2451.2L3613.55 2434.74C3613.55 2434.15 3613.44 2433.73 3613.21 2433.48 3612.98 2433.24 3612.65 2433.11 3612.22 2433.11 3611.75 2433.11 3611.04 2433.36 3610.11 2433.85 3609.18 2434.34 3607.97 2435.06 3606.49 2436.03 3606.29 2435.68 3606.1 2435.32 3605.93 2434.97 3605.77 2434.62 3605.59 2434.26 3605.41 2433.89 3607.3 2432.85 3609.18 2431.82 3611.04 2430.78 3612.9 2429.75 3614.79 2428.72 3616.7 2427.7Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.5586" x="3605.41" y="2453.56">1</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3090.97 2590)">mean actions (</text><path d="M3560.35 2561.97C3558.42 2561.97 3556.7 2562.94 3555.2 2564.89 3553.7 2566.84 3552.49 2569.46 3551.57 2572.75 3550.66 2576.04 3550.2 2578.99 3550.2 2581.59 3550.2 2583.11 3550.42 2584.25 3550.87 2585 3551.32 2585.75 3552.01 2586.12 3552.95 2586.12 3554.32 2586.12 3555.72 2585.45 3557.15 2584.11 3558.58 2582.76 3559.73 2581.18 3560.6 2579.37 3561.48 2577.56 3562.26 2575.08 3562.95 2571.93L3563.2 2570.78C3563.53 2569.24 3563.7 2567.7 3563.7 2566.15 3563.7 2564.78 3563.44 2563.74 3562.92 2563.03 3562.39 2562.32 3561.54 2561.97 3560.35 2561.97ZM3559.98 2558.72C3563.08 2558.72 3565.73 2559.47 3567.92 2560.97L3570.95 2558.72 3574.45 2559.22 3569.38 2580.47C3569.07 2581.78 3568.92 2582.97 3568.92 2584.03 3568.92 2584.72 3569.03 2585.25 3569.26 2585.62 3569.49 2586 3569.88 2586.18 3570.45 2586.18 3571.09 2586.18 3571.79 2585.89 3572.54 2585.31 3573.29 2584.73 3574.17 2583.86 3575.17 2582.72L3577.42 2584.97C3575.23 2587.15 3573.34 2588.66 3571.76 2589.48 3570.18 2590.3 3568.51 2590.72 3566.76 2590.72 3565.01 2590.72 3563.62 2590.23 3562.6 2589.26 3561.58 2588.29 3561.07 2587.02 3561.07 2585.43 3561.07 2584.91 3561.13 2584.41 3561.26 2583.93L3560.82 2583.87C3559.3 2585.66 3557.99 2587.01 3556.9 2587.92 3555.81 2588.83 3554.64 2589.52 3553.42 2590 3552.19 2590.48 3550.82 2590.72 3549.32 2590.72 3546.82 2590.72 3544.93 2589.87 3543.63 2588.17 3542.34 2586.47 3541.7 2584 3541.7 2580.75 3541.7 2577.93 3542.1 2575.14 3542.92 2572.37 3543.73 2569.6 3544.89 2567.21 3546.42 2565.2 3547.94 2563.19 3549.85 2561.61 3552.15 2560.45 3554.45 2559.29 3557.06 2558.72 3559.98 2558.72Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="3541.7" y="2586.72">𝒂</text><path d="M3540.29 2548.49 3576.58 2548.49 3576.58 2551.8 3540.29 2551.8Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="3.3125" x="3540.29" y="2551.39">ഥ</text><path d="M3587.79 2574.8 3593.3 2574.8 3591.97 2580.54 3597.54 2580.54 3596.93 2583.4 3591.3 2583.4 3589.08 2592.58C3588.69 2594.16 3588.43 2595.36 3588.28 2596.18 3588.14 2596.99 3588.07 2597.69 3588.07 2598.25 3588.07 2598.97 3588.2 2599.5 3588.46 2599.82 3588.72 2600.15 3589.1 2600.32 3589.6 2600.32 3590.29 2600.32 3591 2600.07 3591.73 2599.58 3592.45 2599.09 3593.24 2598.34 3594.08 2597.31L3595.85 2599.12C3594.3 2600.82 3592.88 2602 3591.59 2602.67 3590.3 2603.34 3588.77 2603.67 3587.01 2603.67 3585.27 2603.67 3583.93 2603.22 3582.99 2602.33 3582.06 2601.43 3581.59 2600.2 3581.59 2598.62 3581.59 2597.9 3581.65 2597.22 3581.76 2596.57 3581.86 2595.92 3582.08 2594.85 3582.42 2593.36L3584.78 2583.4 3581.2 2583.4 3581.64 2581.55C3582.71 2581.55 3583.52 2581.45 3584.07 2581.25 3584.62 2581.05 3585.09 2580.75 3585.46 2580.35 3585.84 2579.95 3586.2 2579.35 3586.55 2578.54 3586.9 2577.73 3587.32 2576.48 3587.79 2574.8Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="3581.2" y="2600.06">𝒕</text><path d="M3613.24 2576.04 3616.96 2576.04 3616.96 2588.11 3628.31 2588.11 3628.31 2591.62 3616.96 2591.62 3616.96 2603.69 3613.24 2603.69 3613.24 2591.62 3601.88 2591.62 3601.88 2588.11 3613.24 2588.11Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3601.88" y="2600.23">+</text><path d="M3647.54 2571.61 3650.48 2571.61C3650.39 2573.08 3650.34 2575.11 3650.34 2577.71L3650.34 2596.9C3650.34 2597.79 3650.39 2598.46 3650.49 2598.91 3650.59 2599.36 3650.77 2599.73 3651.02 2600.01 3651.27 2600.29 3651.64 2600.51 3652.11 2600.67 3652.59 2600.83 3653.18 2600.96 3653.9 2601.04 3654.62 2601.13 3655.57 2601.18 3656.75 2601.21L3656.75 2603.3 3636.94 2603.3 3636.94 2601.21C3638.64 2601.14 3639.85 2601.03 3640.58 2600.9 3641.31 2600.77 3641.86 2600.58 3642.24 2600.32 3642.63 2600.06 3642.9 2599.69 3643.08 2599.22 3643.26 2598.74 3643.34 2597.97 3643.34 2596.9L3643.34 2580.31C3643.34 2579.68 3643.23 2579.23 3642.99 2578.95 3642.75 2578.68 3642.43 2578.54 3642.01 2578.54 3641.63 2578.54 3641.09 2578.72 3640.4 2579.07 3639.7 2579.42 3638.56 2580.14 3636.96 2581.22L3635.56 2578.79Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="31.6929" x="3635.56" y="2599.34">𝟏</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3662.53 2590)">)</text><path d="M1489 2613C1489 2588.7 1508.7 2569 1533 2569 1557.3 2569 1577 2588.7 1577 2613 1577 2637.3 1557.3 2657 1533 2657 1508.7 2657 1489 2637.3 1489 2613Z" fill="#C00000" fill-rule="evenodd"/><path d="M0 0 80.649 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.15 2613.5)"/><path d="M1705 2115C1717.43 2115 1727.5 2116.68 1727.5 2118.75L1727.5 2224.75C1727.5 2226.82 1737.57 2228.5 1750 2228.5 1737.57 2228.5 1727.5 2230.18 1727.5 2232.25L1727.5 2338.25C1727.5 2340.32 1717.43 2342 1705 2342" stroke="#ED7D31" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M2150.19 2432 1937 2432" stroke="#C00000" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M2150.19 2636 1937 2636" stroke="#C00000" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 0.236385 105.164" stroke="#C00000" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 2044 2755.16)"/><path d="M2031 2637C2031 2629.82 2037.04 2624 2044.5 2624 2051.96 2624 2058 2629.82 2058 2637 2058 2644.18 2051.96 2650 2044.5 2650 2037.04 2650 2031 2644.18 2031 2637Z" fill="#C00000" fill-rule="evenodd"/><g clip-path="url(#clip1)"><g clip-path="url(#clip2)"><g clip-path="url(#clip3)"><path d="M285.135 364.739C288.929 361.344 293.056 362.276 296.65 364.606 292.124 367.201 289.994 367.335 285.135 364.739ZM285.934 361.544C282.406 365.271 279.078 363.94 274.552 362.21 277.215 358.749 281.075 359.68 285.934 361.544ZM291.991 360.945C296.25 357.218 299.046 358.549 303.705 360.279 299.445 363.807 296.983 363.008 291.991 360.945ZM263.504 359.015C268.562 357.218 269.427 355.887 275.218 359.015 271.358 362.343 267.431 361.544 263.504 359.015ZM293.189 357.95C288.397 360.546 286.932 360.612 281.807 358.349 282.14 357.817 281.674 357.284 284.07 356.286 286.733 355.221 290.992 356.685 293.189 357.95ZM298.78 356.352C303.572 354.689 305.236 353.624 310.427 356.552 306.434 359.747 302.573 359.214 298.78 356.352ZM253.187 356.885C257.647 353.025 260.109 354.422 264.902 356.153 260.509 359.614 258.312 358.948 253.187 356.885ZM282.273 355.088C278.479 358.682 275.95 357.284 271.025 355.687 274.02 352.159 277.681 352.758 282.273 355.088ZM288.596 354.023C292.989 351.227 295.385 351.227 299.978 353.956 296.051 357.218 292.856 356.352 288.596 354.023ZM254.518 353.823C249.726 356.352 248.195 356.619 243.07 354.156 243.536 353.557 243.337 353.091 245.333 352.226 248.262 350.961 252.389 352.492 254.518 353.823ZM317.482 352.426C313.489 356.086 310.827 355.021 305.901 353.025 309.695 349.497 312.557 350.628 317.482 352.426ZM260.043 352.292C265.434 350.296 265.833 349.231 271.69 352.492 267.564 355.554 264.036 355.088 260.043 352.292ZM289.728 351.094C284.802 354.289 283.804 353.89 278.28 351.827 278.28 351.827 278.679 348.898 283.272 349.297 284.802 349.43 288.13 350.562 289.728 351.094ZM243.736 350.828C240.009 354.622 237.147 353.158 232.421 351.494 232.754 351.094 231.622 351.494 234.085 350.029 237.147 348.166 240.342 349.164 243.736 350.828ZM295.119 350.163C299.645 347.434 301.708 347.301 306.634 349.83 303.372 353.49 299.046 352.226 295.119 350.163ZM249.993 349.896C254.718 346.768 256.515 347.167 261.707 349.63 257.314 352.559 254.918 352.692 249.993 349.896ZM324.604 348.898C320.145 351.627 318.015 351.494 313.223 349.231 313.622 348.698 313.422 348.099 315.419 347.234 316.95 346.635 316.95 346.702 318.614 346.968 320.611 347.234 323.273 348.166 324.604 348.898ZM278.746 348.232C274.819 352.026 272.156 350.695 267.165 348.831 271.091 345.237 273.887 346.169 278.746 348.232ZM221.772 348.565C227.429 344.771 227.363 345.969 233.553 347.9 229.559 351.56 226.165 351.028 221.772 348.565ZM284.802 347.9C288.263 343.84 292.723 344.971 296.517 347.234 290.859 351.227 290.66 348.898 284.802 347.9ZM250.924 346.901C247.463 350.029 243.603 349.763 239.476 347.101 243.403 344.172 245.866 343.973 250.924 346.901ZM313.755 345.969C310.161 349.697 307.233 348.099 302.507 346.568 303.705 344.971 304.504 344.039 307.366 344.039 309.163 344.039 312.158 345.171 313.755 345.969ZM256.582 346.236C261.441 342.841 262.905 342.442 268.629 345.637 263.637 349.364 262.372 347.766 256.582 346.236ZM211.588 345.836C215.981 341.909 218.178 343.307 223.303 345.171 218.976 348.632 216.514 348.432 211.588 345.836ZM319.945 344.971C324.205 342.109 326.668 342.309 331.26 344.838 327.333 348.099 324.405 347.367 319.945 344.971ZM285.867 344.239C282.406 348.166 278.812 346.435 274.353 344.971 274.952 344.239 273.421 344.904 276.017 343.307 277.681 342.242 277.348 342.309 279.345 342.309 281.408 342.242 284.07 343.374 285.867 344.239ZM291.924 343.374C296.317 340.245 298.713 340.512 303.306 343.241 299.246 346.103 295.785 346.369 291.924 343.374ZM228.827 344.239C233.752 341.377 235.483 340.911 240.741 344.106 236.681 347.101 233.02 347.101 228.827 344.239ZM258.046 342.442C253.786 346.502 251.523 345.237 246.398 343.307 247.53 342.042 249.327 340.379 251.191 340.312 253.187 340.245 256.382 341.643 258.046 342.442ZM212.72 342.575C208.726 346.169 205.931 345.237 201.272 343.174 204.134 339.314 208.394 340.179 212.72 342.575ZM321.01 342.042C316.218 345.038 314.887 344.971 309.229 342.442 312.69 338.581 316.284 340.046 321.01 342.042ZM326.335 340.978C331.393 338.648 332.125 337.849 337.916 340.911 334.455 344.239 329.995 343.241 326.335 340.978ZM263.837 341.71 267.564 338.981 275.95 341.044C271.358 344.439 269.161 344.838 263.837 341.71ZM226.697 343.706 218.51 341.976C222.238 337.783 225.765 338.116 230.225 341.111L226.697 343.706ZM285.069 337.716 292.923 339.913C288.729 344.106 286 342.508 281.075 340.711L285.069 337.716ZM235.949 339.979C240.941 335.919 241.872 336.784 248.062 339.38 243.603 343.507 241.007 342.775 235.949 339.979ZM190.556 340.312C194.816 336.984 198.476 336.718 202.67 339.846 197.744 343.041 195.947 342.974 190.556 340.312ZM298.58 339.513C302.573 335.387 305.635 336.851 310.427 338.914 306.7 342.841 303.172 341.577 298.58 339.513ZM327.932 337.849C323.672 340.844 320.744 341.377 316.151 338.116 320.81 334.388 322.208 335.919 327.932 337.849ZM253.121 338.648 256.981 335.719C260.043 335.853 263.238 336.784 265.434 338.315 260.708 341.976 258.113 341.177 253.121 338.648ZM207.795 338.848 212.387 335.586 220.441 337.716C215.316 342.176 214.051 341.044 207.795 338.848ZM345.038 336.984C340.445 340.445 338.581 339.447 333.19 337.317 337.117 333.856 340.245 334.721 345.038 336.984ZM274.952 334.055 282.739 336.585C278.546 340.911 275.751 340.046 270.559 337.383L274.952 334.055ZM188.759 339.513C185.764 339.513 182.968 338.981 180.572 337.583 184.765 333.723 187.162 334.122 192.153 336.585L188.759 339.513ZM287.731 336.119 292.39 332.991 300.111 335.586C295.785 339.513 292.457 338.715 287.731 336.119ZM225.1 337.184 229.759 333.523 237.945 336.052C233.619 340.911 230.89 339.247 225.1 337.184ZM305.435 335.054 308.963 332.458C312.025 332.591 315.02 333.39 317.349 335.054 313.223 338.382 310.161 337.783 305.435 335.054ZM242.471 335.586C248.728 330.395 247.53 331.393 255.384 334.521 250.392 339.58 248.462 337.783 242.471 335.586ZM197.811 335.853C201.205 331.393 205.998 332.192 210.058 334.987 205.665 338.515 203.402 338.848 197.811 335.853ZM326.268 331.659 334.655 333.723C329.862 337.517 328.331 336.585 323.007 334.388L326.268 331.659ZM259.777 333.723C264.369 329.862 267.364 329.33 272.489 333.39 268.03 337.45 265.168 337.25 259.777 333.723ZM177.977 336.917 170.256 335.254C173.983 330.728 177.71 331.327 182.436 333.922L177.977 336.917ZM351.893 332.924C347.234 336.452 345.637 335.586 340.245 333.523 343.44 329.396 345.637 330.794 351.893 332.924ZM214.85 334.122 219.709 329.929 227.562 332.525C223.502 337.916 220.441 336.252 214.85 334.122ZM277.414 332.525 281.674 329.064 290.061 331.992C286.067 336.585 282.14 335.52 277.414 332.525ZM298.913 328.198 307.366 331.127C302.64 335.12 300.044 334.854 294.653 331.46L298.913 328.198ZM232.355 332.325 237.014 327.533 245.134 330.728C239.543 336.318 239.343 334.788 232.355 332.325ZM187.228 333.057C192.819 328.398 192.553 328.398 199.808 331.526 195.548 336.718 193.285 334.987 187.228 333.057ZM159.806 332.325 164.532 328.931 171.986 330.794C167.993 335.853 165.264 333.856 159.806 332.325ZM316.351 327.666 324.538 330.461 320.611 333.323 312.224 331.127C313.289 329.463 311.293 331.193 313.888 329.197 314.421 328.797 315.752 328.065 316.351 327.666ZM341.377 330.129C336.518 333.456 335.187 332.658 329.663 330.195 332.991 325.802 337.25 327.2 341.377 330.129ZM257.713 333.257 249.393 330.395 254.452 325.736 262.572 329.064 257.713 333.257ZM204.467 330.728C209.325 326.135 211.123 324.937 217.312 329.33 213.053 334.322 210.39 333.59 204.467 330.728ZM358.283 329.263C353.69 332.392 352.426 331.859 347.034 329.396 349.963 325.203 355.154 327.067 358.283 329.263ZM177.311 330.062C180.705 325.004 184.765 325.869 189.558 328.598 185.431 332.924 182.769 333.124 177.311 330.062ZM266.765 328.465 271.624 324.072 279.944 327.866C275.018 332.525 272.689 332.591 266.765 328.465ZM149.889 329.53C155.014 325.669 155.68 325.603 161.936 328.198 158.009 332.192 154.947 332.392 149.889 329.53ZM289.195 323.473 297.249 327.133 291.924 330.994 284.137 327.866C285.202 325.736 282.939 327.932 286.134 325.403 287.065 324.604 288.197 324.005 289.195 323.473ZM221.838 328.331 227.163 323.273 234.95 326.668C230.092 331.793 228.228 332.392 221.838 328.331ZM318.814 326.401 323.073 322.807 331.193 326.069 327.2 329.197 318.814 326.401ZM306.101 323.007 314.155 326.534C309.03 331.26 307.499 329.929 301.575 326.934 301.642 326.734 301.841 326.268 301.908 326.401L303.505 324.671C304.836 323.606 304.703 323.739 306.101 323.007ZM353.69 325.203C357.151 321.676 360.878 322.874 364.938 325.27 361.411 328.797 357.551 327.666 353.69 325.203ZM348.166 325.603 344.638 328.465 336.185 326.069 338.249 323.872C341.244 322.075 342.841 323.473 348.166 325.603ZM202.603 330.062 194.35 327.866 199.275 322.674 207.462 325.336 202.603 330.062ZM166.928 326.934 172.053 323.206 179.507 325.203C176.246 329.995 171.254 330.395 166.928 326.934ZM139.572 327.2C146.428 323.939 141.236 322.275 152.085 325.536 145.629 330.262 147.559 329.197 139.572 327.2ZM247.264 329.33 238.877 326.268 244.335 320.411C247.131 321.742 250.725 323.007 252.389 325.137L247.264 329.33ZM256.049 323.273 261.84 318.414 269.494 322.741C268.296 325.469 267.098 326.069 264.569 327.732 261.507 326.934 257.78 325.603 256.049 323.273ZM137.576 326.268 129.589 324.804C134.514 321.875 135.646 319.479 141.769 323.473L137.576 326.268ZM278.812 317.349C281.475 318.547 285.401 320.611 286.932 322.607L281.674 326.534 273.488 323.007C274.353 321.077 273.887 321.676 275.617 319.812 276.682 318.747 277.481 318.082 278.812 317.349ZM219.908 327.599 211.322 324.87 216.514 318.481 224.967 322.341 219.908 327.599ZM191.954 326.867 184.033 324.604 188.493 319.28C191.688 319.346 194.55 320.344 197.079 322.008L191.954 326.867ZM164.665 326.668 156.745 324.804C161.736 320.211 162.469 318.947 169.457 322.474 168.259 324.072 166.529 325.004 164.665 326.668ZM360.279 321.077C365.072 317.482 366.203 318.947 371.861 321.21L368.466 323.606C365.538 323.473 362.21 322.674 360.279 321.077ZM346.901 318.614 354.822 321.476C350.895 326.202 347.633 323.606 342.775 321.742L346.901 318.614ZM329.995 318.082 337.982 321.676C333.39 326.601 330.994 324.338 325.469 321.809 326.335 320.278 324.405 321.875 327.333 319.612 328.598 318.681 328.731 318.747 329.995 318.082ZM307.898 320.744 313.023 317.482 321.143 321.676 316.617 325.336C313.289 324.471 309.961 323.14 307.898 320.744ZM295.851 317.017 304.038 321.676 299.046 325.869 290.793 322.208C292.057 319.745 293.388 318.947 295.851 317.017ZM127.659 323.739C124.53 323.739 122.4 323.406 119.938 321.942L124.064 319.146 131.785 320.544 127.659 323.739ZM237.346 325.137C234.418 324.87 230.957 323.273 228.428 321.676L233.752 314.953 242.272 319.28 237.346 325.137ZM146.827 322.208C151.753 316.351 152.618 317.682 159.54 319.745 154.348 324.538 154.082 324.937 146.827 322.208ZM181.903 323.539 173.717 321.609 179.374 315.685 187.162 318.215 181.903 323.539ZM117.741 321.343C114.547 321.343 112.35 320.944 110.021 319.479 114.813 316.684 115.878 315.752 121.402 318.681L117.741 321.343ZM209.059 324.138 201.072 321.143 206.463 314.088 214.717 317.815 209.059 324.138ZM366.469 316.75C371.461 314.82 373.192 314.288 378.25 317.682 374.124 320.81 370.197 320.078 366.469 316.75ZM349.297 316.75 353.424 314.421 361.544 317.549 357.817 320.344C354.622 319.812 351.361 318.681 349.297 316.75ZM253.919 323.14 245.467 318.947 251.191 311.692C254.186 313.289 257.846 315.286 259.643 317.749L253.919 323.14ZM144.032 321.409 136.844 319.879C141.303 315.22 143.167 314.354 149.357 317.416 147.826 319.146 146.162 320.145 144.032 321.409ZM336.119 313.422C339.047 314.021 342.442 315.752 344.705 317.682L340.445 320.478 332.125 317.017 336.119 313.422ZM267.764 310.294C270.559 310.694 274.752 313.822 276.749 316.218L271.291 321.809 262.971 317.083 267.764 310.294ZM100.103 317.549C103.831 314.155 106.626 313.755 111.418 316.418 107.425 320.211 105.561 319.28 100.103 317.549ZM319.612 311.825 327.932 316.75 323.14 320.478 314.82 316.284 319.612 311.825ZM302.507 310.294 311.093 316.018 305.835 320.478 297.582 316.018 302.507 310.294ZM280.01 314.953 285.135 309.695 294.121 315.752 288.53 320.877C285.202 318.747 282.074 318.082 280.01 314.953ZM171.321 320.944 163.8 318.814 168.658 312.823 176.978 314.953 171.321 320.944ZM126.86 317.482C132.384 312.091 132.118 313.356 139.439 314.953 133.116 318.88 136.71 320.478 126.86 317.482ZM226.497 320.944 218.045 317.15 223.769 308.83 232.155 313.755 226.497 320.944ZM90.5191 315.153C93.5142 311.758 98.0402 312.091 101.834 314.221 97.2415 317.349 96.1766 317.349 90.5191 315.153ZM384.773 313.822C381.378 317.549 377.718 315.685 373.458 313.622 376.253 309.762 379.648 311.226 384.773 313.822ZM355.953 312.624 360.013 310.294 368.2 313.689 364.539 316.484C361.278 315.819 358.083 314.62 355.953 312.624ZM199.275 320.078C196.413 319.879 192.819 318.614 190.822 316.95L196.679 309.562 204.6 313.156 199.275 320.078ZM117.342 315.02 121.535 311.559 129.189 312.757C125.262 316.95 122.8 318.082 117.342 315.02ZM161.603 318.148 153.816 316.284 158.808 310.294 166.928 312.025 161.603 318.148ZM343.573 308.431 351.693 313.156 347.167 316.284 338.781 312.158 343.573 308.431ZM114.547 314.687 107.358 313.289C111.019 309.03 113.748 308.963 118.673 311.026L114.547 314.687ZM80.8016 312.757C84.7951 309.229 86.9915 310.294 91.7172 311.625 88.3227 315.153 85.261 315.086 80.8016 312.757ZM321.476 310.627 326.202 305.835 334.788 311.892 329.862 315.752 321.476 310.627ZM244.002 317.882 235.483 313.755C236.082 310.76 238.877 306.367 241.34 304.371L249.66 310.694 244.002 317.882ZM379.781 309.296C384.839 307.233 386.304 307.233 391.162 310.361 387.169 313.556 383.242 312.158 379.781 309.296ZM188.892 316.75 180.639 314.155 186.03 306.034 194.217 308.63 188.892 316.75ZM151.553 315.486 143.699 314.088 149.024 308.031 156.944 309.762 151.553 315.486ZM71.1507 310.294C74.9445 307.166 77.0744 307.565 81.6003 309.629 77.8065 312.823 75.3439 312.491 71.1507 310.294ZM362.476 308.564C368.599 305.835 368.666 306.301 374.656 310.028L370.729 312.69C367.934 311.892 364.339 310.427 362.476 308.564ZM304.104 308.963 309.229 303.172 317.948 310.294 312.823 314.953 304.104 308.963ZM261.041 315.885 252.389 310.228 257.913 301.242 266.898 308.497 261.041 315.885ZM216.048 316.418 207.795 312.491 213.386 303.106 221.972 307.632 216.048 316.418ZM97.3746 310.827C102.034 307.432 103.565 307.033 109.089 309.163 104.696 313.356 103.165 312.89 97.3746 310.827ZM286.866 308.098 292.257 300.776C295.518 303.306 299.112 306.234 300.776 309.496L295.851 314.554 286.866 308.098ZM269.561 307.765 275.218 300.044 283.937 308.231 278.613 314.687C275.95 313.489 271.025 309.828 269.561 307.765ZM141.436 313.422 134.115 311.958C135.978 306.5 141.17 305.302 146.495 307.965L141.436 313.422ZM345.304 306.9 349.896 304.104 358.283 308.963 353.69 312.224C350.828 310.893 347.034 309.03 345.304 306.9ZM88.0565 308.63C91.5175 305.369 95.2448 305.169 99.2382 307.565 94.912 310.294 93.4477 311.359 88.0565 308.63ZM61.167 308.297C65.6929 304.637 66.891 305.702 72.2156 307.299 67.8228 310.827 65.9592 309.695 61.167 308.297ZM131.586 311.093 123.865 309.961 128.923 304.903 136.245 305.835 131.586 311.093ZM397.885 306.767C393.425 309.562 391.296 308.763 386.636 306.301 386.969 305.901 387.235 305.036 388.833 304.371 391.429 303.372 395.821 305.435 397.885 306.767ZM178.576 313.356 170.722 311.093 176.046 302.64 184.433 305.036 178.576 313.356ZM78.2059 306.5C82.3325 303.372 84.1295 303.106 88.9883 305.435 85.5273 308.896 82.399 308.297 78.2059 306.5ZM372.393 302.973C375.455 303.306 378.783 304.703 381.245 306.5L376.653 309.03 369.331 305.435 372.393 302.973ZM328.065 304.903 332.924 300.51 341.51 307.033 336.718 310.827 328.065 304.903ZM122.334 309.163 114.48 308.164C118.274 303.039 120.87 303.239 126.527 304.836L122.334 309.163ZM51.516 305.502C55.7092 302.906 58.3715 303.039 62.2319 305.103 58.0387 307.632 55.8423 308.098 51.516 305.502ZM352.093 303.172C354.156 300.976 354.422 300.91 357.018 300.244L364.672 305.103 360.346 308.164 352.093 303.172ZM233.087 312.224 224.834 307.233 230.624 296.317 239.41 303.106C238.012 305.968 235.616 310.095 233.087 312.224ZM105.029 306.168C106.293 304.171 107.159 304.171 109.355 302.573L116.61 303.172C112.816 307.698 110.819 308.231 105.029 306.168ZM68.3552 304.038C72.8812 301.708 74.8114 300.976 79.537 303.505 75.2107 306.168 73.1474 306.567 68.3552 304.038ZM404.075 303.039C400.347 306.634 397.219 304.903 392.76 302.573 397.951 299.778 398.284 300.643 404.075 303.039ZM206.131 311.559C203.136 310.694 199.808 309.562 197.744 307.698L203.468 297.249 212.054 301.908 206.131 311.559ZM168.792 310.294 160.538 308.697 166.462 299.911 174.249 301.841 168.792 310.294ZM102.167 305.635 94.8454 304.371C99.3048 300.643 100.503 300.31 106.227 302.108L102.167 305.635ZM41.7985 303.172C46.3245 300.776 47.7222 300.51 52.4478 302.773 48.7206 305.635 45.4592 305.302 41.7985 303.172ZM375.654 301.242C380.979 299.445 382.177 299.246 387.435 302.973L384.041 305.236C381.179 304.504 377.585 303.172 375.654 301.242ZM310.76 301.176 315.419 296.184 324.737 304.57 320.012 309.163C316.817 307.366 312.491 304.104 310.76 301.176ZM69.0874 301.109C65.9592 304.703 63.4299 303.705 59.1037 302.108 61.6994 299.046 64.9608 299.046 69.0874 301.109ZM250.725 309.096 242.272 302.906C242.871 299.246 245.799 294.254 248.062 291.325L256.582 299.445 250.725 309.096ZM158.808 307.698C156.145 307.832 152.951 307.366 150.821 306.101L156.412 298.048 163.8 299.379 158.808 307.698ZM85.7269 302.041C90.2529 298.846 91.0516 298.78 96.6425 300.71 92.5824 303.971 90.9185 304.836 85.7269 302.041ZM31.8814 300.71C36.7401 298.513 37.4057 297.781 42.3976 300.51 38.4041 303.106 36.1411 302.973 31.8814 300.71ZM410.265 299.778C406.67 303.172 403.675 301.109 399.216 299.312 403.143 296.517 405.805 297.315 410.265 299.778ZM358.615 298.78 362.676 296.384C365.471 297.515 369.664 299.911 371.062 301.908L367.667 303.905C364.606 303.372 360.679 301.109 358.615 298.78ZM339.447 295.452 348.166 302.64 343.307 306.234 334.788 299.512 339.447 295.452ZM298.979 291.724 307.965 301.642 302.64 307.698 293.655 299.379 298.979 291.724ZM268.363 306.767C265.168 305.435 261.041 301.908 259.444 298.713L265.035 288.463 273.887 298.647 268.363 306.767ZM75.8098 300.244C79.8033 296.983 81.8 297.182 86.7253 299.046 82.5321 302.241 81.1344 302.374 75.8098 300.244ZM49.3862 299.578C53.2465 296.783 55.1102 297.049 59.7027 298.979 56.7076 301.775 53.4462 301.708 49.3862 299.578ZM382.244 297.914C387.169 296.317 389.033 296.117 393.758 299.645L390.763 301.575C387.834 301.176 384.24 299.778 382.244 297.914ZM276.35 298.181 281.874 288.929 290.726 298.913C289.927 301.375 287.065 304.836 285.135 306.5L276.35 298.181ZM148.624 305.835 141.17 304.903C141.436 302.707 144.764 298.846 146.561 297.249L153.949 297.848 148.624 305.835ZM32.7466 297.914C29.4187 301.176 26.6233 300.111 22.3635 298.447 26.8229 296.317 27.6882 295.385 32.7466 297.914ZM138.441 304.371 131.186 303.372C135.712 296.45 134.847 296.317 143.633 296.916 143.366 299.112 140.105 302.84 138.441 304.371ZM66.0257 298.38C70.1523 295.718 71.4169 294.786 76.4753 297.049 72.6815 301.043 70.951 299.445 66.0257 298.38ZM196.08 307.033 187.494 304.104 193.352 292.257 201.804 296.117 196.08 307.033ZM128.524 303.039 121.269 301.974 126.66 296.25 133.982 296.45 128.524 303.039ZM112.084 300.843C114.347 295.585 118.207 295.585 123.798 296.051 121.202 302.174 118.274 301.775 112.084 300.843ZM39.6687 297.515C43.1963 294.32 46.9235 294.653 50.1183 296.983 44.9933 299.512 44.9933 299.046 39.6687 297.515ZM416.454 296.25C412.661 299.711 410.398 297.781 405.739 296.051 409.266 293.122 412.261 294.187 416.454 296.25ZM365.471 295.718C369.864 291.724 373.125 294.919 377.518 298.048L373.325 300.643 365.471 295.718ZM223.103 306.034 214.916 301.375C215.382 298.58 219.043 290.327 220.907 288.33L229.093 294.52C228.494 297.515 225.166 303.838 223.103 306.034ZM102.1 299.312C106.759 294.919 106.959 294.32 114.014 295.851 108.09 300.377 111.884 301.442 102.1 299.312ZM23.1622 295.385C19.435 298.78 17.5714 297.781 12.9788 296.117 16.5064 293.255 18.7028 293.588 23.1622 295.385ZM388.766 294.653C393.758 293.122 395.289 293.322 400.081 296.384 396.487 299.578 391.895 297.448 388.766 294.653ZM317.416 294.919 322.341 289.728C325.603 291.392 329.929 296.184 331.26 299.578L326.268 303.306 317.416 294.919ZM99.571 298.846 92.4493 297.848C97.5077 293.788 97.3746 293.788 104.164 295.052L99.571 298.846ZM56.5744 295.718C61.0338 293.455 63.0306 292.989 67.2238 295.252 62.8309 297.848 60.7676 298.181 56.5744 295.718ZM345.969 290.992 354.689 298.58 350.362 301.575C346.968 299.911 344.172 297.049 341.51 294.453L345.969 290.992ZM82.9315 296.117C87.524 292.523 88.1896 292.39 94.1798 294.187 89.3211 298.048 89.3211 297.781 82.9315 296.117ZM30.1509 294.786C34.1443 292.057 36.5404 292.523 40.7336 294.387 36.8067 296.983 34.2775 297.116 30.1509 294.786ZM57.3066 293.056C53.3797 295.918 51.5826 295.651 46.9901 293.588 50.5177 291.192 53.0469 290.793 57.3066 293.056ZM13.7775 293.056C10.3831 296.317 8.18666 295.385 3.72726 293.921 6.5227 290.593 9.31814 291.325 13.7775 293.056ZM422.778 293.056C419.183 296.051 416.255 294.453 412.195 292.856 415.19 289.994 418.917 290.859 422.778 293.056ZM395.821 292.19C399.016 289.328 402.677 291.059 406.338 293.056 403.076 296.317 399.349 294.52 395.821 292.19ZM371.794 292.523 375.787 290.46 384.107 295.052 380.047 297.315 371.794 292.523ZM80.602 295.452 73.214 294.387C78.8714 290.66 77.6068 291.192 84.4623 292.59L80.602 295.452ZM185.564 303.439 177.444 300.843 183.368 288.463 191.688 291.392 185.564 303.439ZM20.4999 292.523C24.8928 289.994 26.2239 289.794 30.883 291.991 27.2223 294.853 24.0941 294.387 20.4999 292.523ZM240.408 301.109 231.622 294.254 237.746 280.143 246.265 288.929C245.799 292.39 242.804 298.114 240.408 301.109ZM64.1621 292.19C67.7562 288.53 69.8861 289.861 74.6783 290.926 70.7513 293.987 69.0208 294.254 64.1621 292.19ZM37.6719 291.325C41.1995 288.397 43.7953 288.929 47.9884 290.859 43.7287 292.989 42.6638 293.721 37.6719 291.325ZM378.916 289.728C382.71 286.932 386.57 289.328 389.964 292.19 385.904 294.919 382.244 292.923 378.916 289.728ZM347.9 290.061 352.825 287.531 361.211 294.72 356.818 297.715 347.9 290.061ZM300.177 290.127 305.635 282.606 314.82 294.453 309.362 300.244 300.177 290.127ZM20.9658 289.794C16.573 292.723 15.6412 291.791 10.7159 290.194 14.9756 287.664 16.3068 287.265 20.9658 289.794ZM418.451 288.929C422.844 287.332 424.774 287.531 428.967 289.728 425.773 292.523 421.247 291.392 418.451 288.929ZM401.679 289.262C405.539 286.799 409.466 287.598 412.86 290.26 407.669 292.723 406.87 291.325 401.679 289.262ZM175.381 299.978 167.46 298.247 173.051 286.4C175.847 286.134 178.842 286.799 181.238 287.798 180.373 291.791 177.777 296.716 175.381 299.978ZM54.378 290.061C58.9705 286.799 59.6361 287.598 64.8942 289.062 61.4332 292.257 58.5712 291.791 54.378 290.061ZM324.271 288.197C325.603 285.801 327.067 284.869 329.33 283.471L338.249 293.721 333.257 297.981 324.271 288.197ZM116.077 293.921 109.488 293.522 114.28 288.197 120.803 287.798 116.077 293.921ZM99.7041 292.656C103.365 288.33 105.362 287.864 111.086 288.463 108.823 293.721 105.029 293.388 99.7041 292.656ZM90.5857 291.325C94.3795 287.332 95.8438 287.598 101.235 288.397 98.9054 292.19 94.912 292.923 90.5857 291.325ZM28.2207 288.862C31.9479 286.067 34.8099 286.666 38.4706 288.663 34.2109 290.926 32.8132 291.259 28.2207 288.862ZM385.505 287.265C389.166 284.47 392.294 286.999 396.487 289.062 392.826 292.59 389.099 290.194 385.505 287.265ZM283.272 287.332 288.663 277.015C291.392 279.877 296.716 287.199 297.582 290.66L292.257 297.715 283.272 287.332ZM257.913 297.781 248.728 289.062 254.718 275.151 263.837 286.999 257.913 297.781ZM212.92 300.177 204.467 295.518 210.59 280.543 219.309 286.999 212.92 300.177ZM80.7351 290.127C84.1295 286.067 86.925 287.065 91.7172 287.664 86.3925 291.591 88.3893 291.591 80.7351 290.127ZM354.555 286.932 359.214 284.669 367.667 291.858 363.474 294.054 354.555 286.932ZM271.957 274.353 280.942 287.132C279.145 290.26 277.082 294.786 274.353 296.45 273.221 295.252 266.565 287.598 266.432 286.4 266.366 284.603 270.692 275.884 271.957 274.353ZM165.464 297.715 157.876 296.65C158.808 292.59 161.337 288.796 163.267 285.002L171.188 285.401 165.464 297.715ZM125.928 294.121 119.139 294.054C122.134 287.664 124.064 286.866 131.253 286.866L125.928 294.121ZM71.0176 288.397C75.8098 285.002 75.477 285.002 81.9331 286.733 77.5403 290.327 76.6085 289.395 71.0176 288.397ZM44.5274 287.731C50.1183 284.669 49.253 285.535 55.443 286.866 50.318 289.794 50.9836 289.994 44.5274 287.731ZM435.157 286.466C431.63 288.929 428.701 287.997 424.841 286.134 428.901 283.604 430.964 284.27 435.157 286.466ZM419.25 286.932C414.591 289.262 413.26 288.862 408.334 286.2 412.394 283.604 414.924 285.002 419.25 286.932ZM135.845 294.52 128.59 294.453C130.787 288.263 134.248 285.202 141.17 285.934L135.845 294.52ZM18.5697 286.466C22.6298 283.604 24.6265 284.536 29.0194 286.067 24.9593 288.463 22.9626 288.996 18.5697 286.466ZM392.094 284.802C395.555 282.406 398.883 284.137 402.344 286.466 398.817 289.129 395.289 287.332 392.094 284.802ZM155.48 295.984 147.693 295.319 153.683 284.47 161.204 284.669 155.48 295.984ZM144.897 295.119 138.241 294.786C140.504 287.598 143.633 284.203 151.22 285.069 149.623 288.397 147.626 292.723 144.897 295.119ZM61.6329 286.267C65.1605 283.272 68.4884 283.538 72.0825 285.268 67.3569 288.197 67.1572 288.197 61.6329 286.267ZM45.3927 285.069C41.732 287.332 40.4008 287.731 35.5421 285.401 38.3375 283.005 41.4657 282.673 45.3927 285.069ZM361.344 283.871C366.336 282.539 364.805 281.807 368.466 284.669 370.596 286.333 371.994 287.132 373.924 289.395L369.265 291.059 361.344 283.871ZM415.123 283.138C418.385 280.343 422.312 282.207 425.174 283.671 421.047 286.067 419.05 285.734 415.123 283.138ZM51.9819 284.337C56.7076 281.341 56.7741 281.408 62.4316 283.272 58.6378 286.866 56.9738 285.468 51.9819 284.337ZM431.097 282.473C435.557 280.609 437.354 281.009 441.613 282.806 438.286 285.601 434.292 284.869 431.097 282.473ZM398.617 282.273C401.878 279.944 406.071 281.741 408.734 283.737 404.674 285.867 402.078 284.869 398.617 282.273ZM368.466 282.273C372.393 279.478 376.386 283.405 380.114 286.4 375.721 289.994 372.127 285.934 368.466 282.273ZM330.994 282.406 335.653 278.346 344.838 289.328 339.913 292.923 330.994 282.406ZM87.8568 285.335C91.7172 281.807 93.248 281.475 98.5061 282.473 94.1133 286.733 94.3129 286.533 87.8568 285.335ZM26.0242 283.072C29.5518 280.41 32.8132 280.609 36.1411 282.939 32.1476 284.936 30.5502 285.335 26.0242 283.072ZM307.299 280.543 312.224 273.953C315.153 277.215 320.078 284.337 321.276 288.064L316.551 293.189C312.224 289.661 310.095 284.137 307.299 280.543ZM103.764 286.599 97.2415 286.2C102.366 282.273 100.969 281.541 108.423 281.807L103.764 286.599ZM85.5938 284.736 78.8714 284.337C81.6669 280.543 84.5289 281.208 88.7886 281.874 87.6571 283.804 87.524 283.072 85.5938 284.736ZM113.681 286.4 106.959 286.466C110.154 281.874 112.417 280.742 118.141 280.276L113.681 286.4ZM69.2871 282.606C71.0841 279.212 76.3422 279.744 79.4039 281.208 74.5452 284.137 75.2107 283.871 69.2871 282.606ZM42.3976 282.074C47.0566 278.879 47.9219 279.611 52.9137 281.075 48.4543 284.603 47.6557 283.272 42.3976 282.074ZM420.714 280.21C425.04 277.348 426.838 278.546 431.563 280.343 427.237 283.272 425.706 281.874 420.714 280.21ZM374.656 280.343C380.313 279.212 382.177 281.341 386.503 284.47L382.377 285.934 374.656 280.343ZM437.287 278.945C441.613 277.747 443.743 277.348 447.737 279.744 444.276 282.14 440.216 281.541 437.287 278.945ZM414.724 280.676C411.396 283.205 408.268 281.874 404.807 279.811 407.735 277.414 411.13 278.613 414.724 280.676ZM382.044 279.012C385.239 277.414 390.097 279.877 392.36 282.207 388.3 284.203 384.839 282.074 382.044 279.012ZM230.225 293.122 221.639 286.533 227.629 269.095 236.282 278.213 230.225 293.122ZM202.803 294.453 194.483 290.726 200.54 274.087 209.192 279.544 202.803 294.453ZM59.3033 280.41C63.2303 277.947 65.6264 277.614 69.6864 279.544 65.1605 282.539 64.5614 282.207 59.3033 280.41ZM42.9966 278.945C39.7352 282.007 37.206 281.408 33.0794 279.478 37.1395 276.882 38.271 277.082 42.9966 278.945ZM116.277 286.067C119.538 280.543 121.868 279.345 127.991 277.814 126.261 284.203 122.8 286.267 116.277 286.067ZM388.367 277.348C391.429 275.617 396.021 277.747 398.351 279.944 394.757 282.007 391.162 279.877 388.367 277.348ZM337.65 277.614 342.042 274.819C344.705 276.749 350.029 283.405 350.895 286.267L346.702 288.596 337.65 277.614ZM50.318 278.147C54.1784 275.285 55.7092 276.216 60.1686 277.481 56.4413 280.343 54.378 280.343 50.318 278.147ZM426.971 276.682C431.364 274.819 433.36 275.085 437.487 277.082 433.626 279.744 430.831 278.945 426.971 276.682ZM410.597 276.749C414.724 275.285 417.386 275.418 421.247 277.88 417.187 279.811 414.258 279.078 410.597 276.749ZM85.6604 279.544C89.1214 276.15 91.2513 276.283 96.11 276.815 91.7837 280.742 91.8503 280.676 85.6604 279.544ZM76.8747 278.346C79.4705 275.484 82.998 274.885 86.1263 277.015 82.5987 279.478 81.201 280.01 76.8747 278.346ZM443.411 275.817C447.537 274.087 450.199 274.286 454.06 276.283 450.399 278.613 446.738 278.28 443.411 275.817ZM101.035 280.276 94.912 280.276C100.37 277.215 96.8421 276.083 105.827 276.083L101.035 280.276ZM404.541 277.281C400.946 279.611 398.218 277.947 394.69 275.751 394.757 275.751 394.158 273.354 400.148 275.218 401.878 275.751 403.276 276.35 404.541 277.281ZM289.927 275.018 295.452 264.835C298.314 268.629 300.377 273.155 302.707 277.082 305.968 282.473 303.039 283.405 299.112 288.263L289.927 275.018ZM192.752 289.462 184.366 286.733 190.29 269.76C193.352 270.093 196.08 271.557 198.676 273.221L192.752 289.462ZM133.05 284.47 125.995 285.468C128.058 279.611 131.785 275.484 138.108 275.085L133.05 284.47ZM66.7579 276.815C70.4851 273.354 72.2822 274.486 77.0078 275.285 72.8146 278.479 72.1491 278.413 66.7579 276.815ZM40.2677 275.95C44.9268 273.621 45.5924 273.621 50.5177 275.418 47.1897 278.346 44.0615 277.947 40.2677 275.95ZM417.586 274.02C420.581 271.491 424.175 272.889 427.503 274.552 423.31 276.882 421.579 276.416 417.586 274.02ZM344.239 274.02 348.499 272.489C350.695 274.286 356.885 281.208 357.351 283.471L352.758 285.135 344.239 274.02ZM105.095 279.744C107.092 276.017 110.753 274.286 115.212 274.353 113.282 278.679 109.954 280.21 105.095 279.744ZM66.9575 274.087C63.3634 276.882 61.8325 276.35 57.5728 274.819 57.5728 274.819 58.0387 272.955 61.5663 272.689 63.4299 272.489 65.4932 273.354 66.9575 274.087ZM433.227 273.221C437.553 271.957 439.683 271.557 443.61 273.953 439.95 276.35 436.422 275.751 433.227 273.221ZM400.747 273.354C404.541 271.69 407.602 272.689 411.063 274.619 407.136 276.882 404.208 275.551 400.747 273.354ZM255.783 273.088C256.715 269.161 259.91 259.377 262.106 256.715 266.832 266.299 275.617 271.424 264.502 284.337L255.783 273.088ZM247.131 287.398 238.611 278.413 244.535 260.642C245.999 261.84 252.788 271.225 253.187 272.755 253.653 275.018 248.661 285.535 247.131 287.398ZM460.183 272.622C456.323 275.684 454.06 274.552 449.6 272.889 453.794 269.96 455.125 271.358 460.183 272.622ZM351.227 272.09 355.288 271.291 364.14 280.875 359.414 282.473 351.227 272.09ZM313.822 272.556C315.353 269.827 317.083 268.03 319.546 266.832L328.132 281.408 323.073 286.2 313.822 272.556ZM135.779 284.137C137.509 277.215 140.704 272.755 148.025 271.69 147.227 274.419 145.629 277.414 144.365 280.21 142.301 284.802 140.637 284.27 135.779 284.137ZM278.679 258.512C280.476 260.309 284.403 268.363 286.067 271.424 287.997 274.819 287.997 274.819 286.067 278.28 284.802 280.476 283.471 283.205 281.807 284.936 280.543 283.604 273.887 274.087 273.421 272.622 272.556 270.027 277.148 261.241 278.679 258.512ZM83.8633 274.02C86.8584 270.958 89.3211 271.225 93.4477 271.89 90.4526 274.885 88.3227 275.617 83.8633 274.02ZM47.7888 272.689C51.5826 270.027 53.9121 270.093 58.0387 272.09 53.5793 274.486 52.5144 274.619 47.7888 272.689ZM423.044 271.091C426.971 268.828 429.966 269.427 433.427 271.291 429.433 273.887 427.37 273.088 423.044 271.091ZM406.937 270.626C411.063 269.294 412.993 269.694 416.92 271.757 413.792 274.22 409.865 272.889 406.937 270.626ZM378.117 270.692C382.377 270.692 384.972 272.156 388.167 274.885 384.44 276.949 380.646 273.554 378.117 270.692ZM381.977 276.549C377.851 278.08 374.922 274.486 371.661 271.557 371.794 271.424 372.06 271.091 372.193 271.291 372.26 271.424 372.593 271.025 372.726 271.025 373.924 270.958 372.925 270.626 374.589 271.091 374.989 271.225 376.719 272.356 377.252 272.622 379.515 274.087 380.646 274.552 381.977 276.549ZM364.872 270.958C369.997 270.426 372.659 274.752 376.386 277.814L371.727 278.679 364.872 270.958ZM357.75 271.291C365.604 270.559 364.739 274.819 370.063 279.012L366.336 280.41 357.75 271.291ZM120.67 277.88 114.28 278.945C116.41 274.885 120.204 271.291 124.996 270.958L120.67 277.88ZM93.0483 274.619C96.1766 271.225 98.6392 271.225 103.232 271.158 98.5727 273.953 101.501 275.751 93.0483 274.619ZM73.813 272.955C78.2724 269.694 78.8714 270.492 84.2626 271.225 78.5386 273.687 81.8666 274.619 73.813 272.955ZM439.417 270.16C444.542 267.897 444.742 268.962 450 270.226 446.139 272.689 443.277 272.689 439.417 270.16ZM384.307 270.293C388.101 269.494 391.695 270.825 394.224 273.354 390.696 274.952 387.502 272.423 384.307 270.293ZM182.702 285.734 174.516 284.203 180.506 266.898 188.626 268.762 182.702 285.734ZM64.2286 271.225C68.089 268.229 69.8195 268.629 74.4786 270.16 69.6864 273.288 69.8861 272.489 64.2286 271.225ZM455.657 269.095C460.25 267.564 461.381 267.165 466.24 269.627 462.047 271.69 459.451 271.557 455.657 269.095ZM391.296 268.695C394.757 268.096 398.084 269.361 400.813 271.358 396.82 272.689 393.825 271.69 391.296 268.695ZM219.842 284.669 211.655 279.012C211.921 274.952 215.782 263.038 217.645 259.111 219.975 259.976 220.973 261.64 222.97 263.57 225.699 266.233 226.564 266.166 225.433 270.359 224.7 273.022 221.04 282.739 219.842 284.669ZM103.298 273.953C104.896 270.759 108.556 269.561 112.417 269.361 108.889 273.288 108.956 274.419 103.298 273.953ZM413.659 268.296C416.454 265.9 420.448 267.165 423.51 268.828 419.583 270.892 417.386 270.426 413.659 268.296ZM152.418 282.872 145.097 283.272C146.029 279.744 147.892 276.749 149.357 273.621 151.353 269.427 153.55 268.962 158.408 268.695L152.418 282.872ZM55.1767 269.095C58.8374 266.499 60.3683 266.632 64.6945 268.429 61.9657 271.091 58.9705 271.025 55.1767 269.095ZM429.3 267.564C433.36 265.966 435.889 266.033 439.484 268.229 436.023 270.559 432.362 269.827 429.3 267.564ZM397.685 267.431C400.747 265.7 404.075 267.098 407.07 268.828 402.877 270.492 401.279 269.827 397.685 267.431ZM172.452 283.804 164.665 283.005 170.455 266.632 178.442 266.565 172.452 283.804ZM445.54 266.565C449.8 265.367 452.13 264.968 455.857 267.231 451.997 269.427 448.802 269.228 445.54 266.565ZM161.869 282.673 154.947 282.806C155.28 279.544 159.207 271.025 160.871 267.83L168.392 266.699C167.394 270.359 163.933 280.343 161.869 282.673ZM123.732 276.949C125.529 271.89 129.456 267.497 134.78 265.966 132.917 272.689 130.853 275.751 123.732 276.949ZM90.7188 269.561C94.5126 266.898 95.4444 266.699 100.303 266.898 96.6425 270.492 96.1766 270.359 90.7188 269.561ZM81.201 268.895C84.3292 265.9 86.7919 266.499 91.0516 266.965 86.326 270.093 87.4574 270.16 81.201 268.895ZM461.714 266.033C465.242 263.97 468.769 263.903 472.164 266.299 468.17 268.363 465.641 267.897 461.714 266.033ZM72.1491 267.497C74.9445 264.303 77.4072 265.367 81.4672 266.166 78.4721 269.161 76.1426 268.828 72.1491 267.497ZM419.183 264.902C423.51 263.637 425.373 263.637 429.367 265.567 425.706 268.163 422.378 267.231 419.183 264.902ZM403.143 265.168C407.003 263.437 409.067 264.036 412.927 265.833 409.998 268.363 406.537 266.965 403.143 265.168ZM320.544 265.101 325.403 260.841 334.721 276.416 329.995 280.41 320.544 265.101ZM112.616 272.289C114.081 268.828 118.34 266.432 122.201 265.367 119.272 270.16 118.74 272.289 112.616 272.289ZM101.102 269.095C103.764 266.1 105.694 265.567 109.821 265.367 106.094 267.697 108.29 269.76 101.102 269.095ZM435.49 264.502C439.816 262.572 441.68 262.905 445.674 264.635 442.213 267.165 439.084 266.832 435.49 264.502ZM62.365 265.634C65.227 262.705 68.1556 263.437 72.0159 264.635 68.4884 267.697 66.8244 267.098 62.365 265.634ZM451.597 263.437C456.323 261.374 457.255 262.106 462.113 263.637 458.652 266.166 455.058 265.833 451.597 263.437ZM387.968 263.304C390.63 262.106 394.291 263.504 396.554 265.501 392.826 266.432 390.896 265.634 387.968 263.304ZM380.979 263.703C384.373 263.637 388.234 264.902 390.63 267.165 385.372 268.229 385.039 266.432 380.979 263.703ZM374.656 263.837C377.917 263.57 382.244 265.966 384.24 268.163 378.516 269.095 378.45 265.634 374.656 263.837ZM467.704 262.905C471.565 260.109 473.362 260.775 477.888 262.705 475.092 265.7 471.698 264.036 467.704 262.905ZM409.998 262.505C413.127 260.442 416.321 261.441 419.383 263.104 415.722 265.434 413.193 264.502 409.998 262.505ZM368.533 262.705C372.06 263.371 375.721 266.299 378.25 268.762L374.39 268.962 368.533 262.705ZM302.307 253.786 311.559 272.289 306.034 278.945C303.505 275.684 300.643 269.96 298.58 266.299 296.849 263.038 296.25 263.371 298.247 260.043 299.445 257.913 300.776 255.583 302.307 253.786ZM88.5889 264.768C92.4493 262.106 92.9818 262.239 97.774 262.772 94.8454 265.767 92.9818 265.501 88.5889 264.768ZM78.8049 263.837C82.1993 261.441 83.7302 260.908 87.8568 262.439 85.1945 265.634 82.998 264.635 78.8049 263.837ZM425.506 261.973C430.631 259.91 430.964 261.108 435.557 262.04 432.362 264.835 428.701 264.436 425.506 261.973ZM393.958 261.707C396.82 260.176 400.547 261.574 403.01 263.104 399.349 264.768 396.82 263.77 393.958 261.707ZM110.753 267.098C112.084 264.635 115.811 262.639 118.939 262.04 115.878 265.634 116.344 267.165 110.753 267.098ZM98.5727 264.768C101.568 262.239 102.899 262.106 106.892 261.973 103.897 264.835 102.899 265.168 98.5727 264.768ZM441.68 260.975C445.607 259.377 448.203 259.643 451.73 261.507 448.469 263.77 444.542 263.437 441.68 260.975ZM361.943 260.975C365.205 261.64 370.33 266.166 372.193 268.828 367.8 269.494 369.265 268.962 366.403 266.166 364.339 264.169 363.541 263.437 361.943 260.975ZM133.316 274.02C134.714 268.163 138.907 262.04 144.764 260.043 144.165 263.038 142.501 265.966 141.37 268.895 139.639 273.155 137.842 273.354 133.316 274.02ZM69.2205 261.973C73.1474 259.843 75.3439 259.244 79.0046 261.507 74.7448 263.637 73.6799 263.703 69.2205 261.973ZM457.654 260.309C461.581 258.312 464.443 258.379 468.104 260.575 464.177 262.639 461.581 262.505 457.654 260.309ZM237.014 276.283C231.223 269.694 227.629 269.161 229.226 263.637 230.558 258.911 232.687 249.527 234.817 245.799 237.28 248.994 243.337 256.182 242.937 259.843 242.738 261.574 238.012 274.619 237.014 276.283ZM121.935 269.627C123.133 265.301 127.326 261.707 131.452 259.51 130.254 265.168 127.459 268.163 121.935 269.627ZM415.39 259.377C419.649 257.846 421.114 257.979 425.107 260.043 421.713 262.239 418.518 261.64 415.39 259.377ZM399.216 259.577C403.209 257.78 405.606 258.712 409.399 260.509 404.807 262.239 403.808 261.574 399.216 259.577ZM327.2 259.51 332.059 257.048 341.31 273.088 336.452 275.684 327.2 259.51ZM209.791 277.215C206.929 276.283 203.335 274.153 201.272 271.624L207.528 250.392 215.715 256.781C215.582 261.773 211.522 271.957 209.791 277.215ZM108.823 262.772 115.678 259.244C114.081 261.973 111.618 263.97 108.823 262.772ZM441.347 259.178C438.219 261.707 435.623 260.841 432.029 259.244 434.425 256.382 437.886 257.447 441.347 259.178ZM378.184 258.379C381.378 258.512 383.908 259.577 386.437 261.374 382.909 261.84 380.646 260.841 378.184 258.379ZM354.888 258.845C359.081 259.044 364.073 265.501 366.07 268.828 361.145 269.494 362.343 268.962 359.414 264.968 358.482 263.703 355.487 259.244 354.888 258.845ZM95.9769 260.575C99.2382 258.312 100.303 258.246 104.297 258.579 101.634 261.307 99.9704 261.041 95.9769 260.575ZM86.5922 259.843C89.2545 257.314 91.4509 257.713 95.2448 258.579 92.3827 261.108 90.5857 261.307 86.5922 259.843ZM457.987 258.046C454.326 260.442 452.13 260.176 448.07 258.113 450.799 255.517 454.326 256.648 457.987 258.046ZM383.708 257.514C388.167 257.447 388.833 258.113 392.893 259.577 389.432 261.041 386.503 259.976 383.708 257.514ZM367.667 257.447 373.724 261.174C370.529 261.374 369.531 259.976 367.667 257.447ZM348.166 256.981C352.625 257.247 352.625 258.579 355.021 261.84 356.486 263.837 359.081 266.832 360.013 268.895L355.953 269.294 348.166 256.981ZM334.588 256.249C340.445 255.051 339.646 256.915 342.575 261.906 344.239 264.768 346.236 267.63 347.5 270.626L343.107 272.423C341.111 269.228 334.588 258.845 334.588 256.249ZM119.805 263.837C122.134 262.306 123.133 258.645 127.459 256.715 126.727 260.376 124.198 263.238 119.805 263.837ZM77.0078 258.379C80.136 256.116 82.9315 255.85 85.6604 258.113 82.3325 259.976 80.602 260.109 77.0078 258.379ZM405.339 256.781C409.333 255.251 411.396 255.583 414.99 257.58 411.795 259.91 408.468 258.645 405.339 256.781ZM285.268 242.538C287.931 245.733 290.46 253.387 292.457 257.647 294.653 262.505 295.984 262.306 288.862 272.822 276.749 255.45 280.21 253.653 285.268 242.538ZM119.272 259.044 123.066 255.583C122.4 258.179 122.201 258.579 119.272 259.044ZM438.019 255.916C441.014 253.187 444.01 254.252 447.404 255.983 444.209 258.179 441.347 257.713 438.019 255.916ZM431.164 256.648C428.635 259.444 425.307 258.379 422.112 256.848 424.375 254.252 427.17 254.851 431.164 256.648ZM389.565 255.983C393.758 255.384 395.023 255.717 398.55 257.58 395.356 259.244 392.161 258.246 389.565 255.983ZM361.877 255.184C364.206 256.382 365.937 257.78 367.068 259.976 364.606 259.71 365.671 260.908 363.541 258.246 363.208 257.846 361.943 255.251 361.877 255.184ZM341.044 255.717C346.835 255.65 345.836 256.848 348.831 261.507 350.628 264.236 352.559 266.565 353.956 269.494L349.43 270.293 341.044 255.717ZM254.385 270.626C243.07 258.845 244.601 256.382 251.656 237.28 261.174 252.255 262.505 255.051 254.385 270.626ZM142.701 271.225C142.9 268.296 145.296 263.903 146.628 260.975 148.491 256.715 150.288 255.317 154.681 253.587 154.482 257.114 152.418 261.174 151.22 264.702 149.223 270.359 148.491 269.561 142.701 271.225ZM396.287 254.252C399.349 252.322 402.211 253.587 405.206 254.718 401.878 257.114 399.349 256.182 396.287 254.252ZM268.229 236.814C271.491 239.809 274.353 248.528 276.483 253.054 278.613 257.647 274.153 265.434 271.557 269.627 259.643 251.79 262.439 254.518 268.229 236.814ZM103.498 256.449C106.493 254.385 107.159 254.252 110.886 254.518 108.423 256.848 107.159 256.648 103.498 256.449ZM93.1149 256.382C97.3081 253.587 97.1749 254.119 102.433 254.652 97.9736 257.447 99.2382 257.114 93.1149 256.382ZM428.302 253.454C430.831 251.057 434.292 251.923 437.354 253.653 434.425 256.049 431.364 255.251 428.302 253.454ZM421.247 254.252C418.717 256.915 415.39 255.983 412.195 254.385 412.661 253.853 412.328 253.653 413.526 253.121 415.057 252.522 413.992 252.655 415.989 252.788 417.32 252.921 419.982 253.72 421.247 254.252ZM84.1295 254.918C87.3243 252.255 89.0548 253.254 93.3146 253.853 89.787 255.983 89.1214 257.048 84.1295 254.918ZM379.914 252.855C383.375 252.322 385.971 252.721 388.766 254.718 384.906 255.517 383.175 254.652 379.914 252.855ZM303.838 250.858 308.763 244.202 318.348 264.835 312.757 270.293C310.827 266.366 304.304 254.718 303.838 250.858ZM131.053 265.501C132.318 259.976 137.043 254.385 141.303 251.79 139.572 259.178 137.842 262.306 131.053 265.501ZM417.719 250.858C421.446 249.26 424.242 249.26 427.636 251.523 423.842 253.32 421.047 253.054 417.719 250.858ZM411.263 251.923C408.734 254.385 405.539 253.32 402.278 251.856 403.742 250.259 403.609 250.192 406.071 250.458 407.403 250.592 409.932 251.39 411.263 251.923ZM395.422 252.255C391.162 253.919 390.164 252.988 385.904 251.523 388.766 249.726 391.961 250.725 395.422 252.255ZM101.035 252.522C103.431 250.325 106.027 250.259 108.689 251.59 105.362 253.254 104.896 253.121 101.035 252.522ZM353.823 249.46 361.411 258.046C356.219 257.38 355.953 252.056 353.823 249.46ZM90.7188 251.457C95.2448 249.194 94.8454 249.46 100.103 250.458 96.3762 253.054 95.3779 252.855 90.7188 251.457ZM371.395 249.26 377.784 250.725C375.122 252.056 373.524 250.991 371.395 249.26ZM199.541 270.892 191.222 267.697 197.478 244.535 205.665 248.661C205.665 252.056 201.006 267.764 199.541 270.892ZM407.735 248.395C411.795 247.064 413.925 246.997 417.786 249.061 413.925 251.124 410.997 250.658 407.735 248.395ZM401.213 249.527C398.55 251.723 395.888 250.658 392.227 249.327 393.625 247.929 393.492 247.663 396.154 247.996 397.552 248.129 399.882 248.928 401.213 249.527ZM159.407 266.299 151.952 268.163C153.35 263.837 154.814 259.777 156.279 255.717 158.408 249.593 159.806 250.126 165.131 247.264L159.407 266.299ZM384.706 249.527C381.977 250.991 379.648 249.859 376.719 248.395 377.318 248.062 375.188 247.996 378.383 247.73 380.513 247.53 382.843 248.462 384.706 249.527ZM129.456 258.113C132.185 252.255 134.314 251.39 137.909 247.131 136.444 252.455 134.448 256.116 129.456 258.113ZM107.824 248.728C111.219 247.131 111.285 246.997 115.079 247.73 112.816 249.66 110.753 249.793 107.824 248.728ZM97.8405 248.195C101.102 245.6 102.832 246.332 106.826 247.131 103.431 249.593 102.1 248.994 97.8405 248.195ZM397.818 245.999C402.211 244.801 403.542 244.734 407.735 246.531 404.141 248.928 400.946 248.062 397.818 245.999ZM382.044 245.999C385.771 245.067 387.635 245.4 391.162 247.131 388.434 249.127 384.839 247.929 382.044 245.999ZM169.391 264.702 161.67 265.7C162.469 261.507 164.465 256.049 165.73 252.056 167.793 245.666 167.993 244.468 175.248 243.403L169.391 264.702ZM346.369 243.536C348.765 244.868 354.422 253.454 355.82 256.449 350.229 256.182 348.698 247.463 346.369 243.536ZM226.564 265.035 218.51 256.648C218.71 252.788 223.103 234.684 224.834 232.022 230.025 237.746 233.752 241.939 232.355 246.864 230.957 251.923 228.96 261.108 226.564 265.035ZM179.374 264.702 171.654 264.635C171.72 261.174 174.116 254.385 175.181 250.791 177.644 242.538 175.847 241.606 185.365 241.872L179.374 264.702ZM387.968 243.536C392.627 241.939 393.625 243.004 397.619 243.936 394.291 246.598 391.162 245.6 387.968 243.536ZM372.593 243.603C375.854 242.338 377.718 242.871 380.979 244.468 378.25 246.199 375.388 245.134 372.593 243.603ZM310.228 242.006 315.353 237.346 324.937 259.044 319.812 262.971 310.228 242.006ZM189.491 266.699 181.371 265.168 187.428 241.939 195.814 243.669 189.491 266.699ZM140.238 259.843C142.102 253.653 146.228 245.067 151.287 241.872 150.688 248.595 147.36 258.113 140.238 259.843ZM104.763 244.801C108.224 243.004 109.888 242.272 113.615 244.135 109.821 246.066 109.022 245.666 104.763 244.801ZM378.184 241.207C381.844 239.942 383.974 240.075 387.369 241.872 384.24 244.069 381.179 242.871 378.184 241.207ZM363.94 241.074C366.336 239.876 368.067 240.408 370.862 241.872 368 243.337 366.536 242.538 363.94 241.074ZM292.324 228.161C294.653 232.155 297.116 239.942 298.913 244.535 299.911 247.064 301.708 249.993 300.71 252.522 299.911 254.585 297.249 258.645 295.851 260.176 292.723 255.583 290.793 249.527 288.463 244.269 286.599 240.208 286 240.208 288.197 235.949 289.462 233.486 290.593 229.759 292.324 228.161ZM138.574 251.324C139.24 247.131 142.434 240.541 146.361 238.877 145.563 247.863 141.636 246.798 138.574 251.324ZM111.818 241.273C115.878 239.277 116.277 239.809 120.67 240.608 117.342 242.405 115.545 243.07 111.818 241.273ZM368.533 238.877C372.326 237.546 373.658 237.613 377.451 239.476 374.523 241.473 371.594 240.408 368.533 238.877ZM338.914 238.345C343.374 240.009 347.833 250.325 349.963 254.718 345.304 254.518 345.104 252.122 343.241 248.129 341.71 244.934 339.846 241.739 338.914 238.345ZM126.261 237.546C123.532 239.876 122.667 239.343 118.806 238.212 121.735 236.481 122.866 236.415 126.261 237.546ZM149.889 253.986C150.089 250.392 152.085 246.465 153.283 242.871 155.014 237.812 154.947 235.15 160.671 234.551 159.873 241.939 157.543 251.191 149.889 253.986ZM147.759 242.671C148.225 238.411 148.558 237.08 152.884 237.014L147.759 242.671ZM367.667 237.413C363.541 238.611 363.075 237.945 359.214 236.814 362.076 234.817 365.404 235.549 367.667 237.413ZM332.192 236.148C337.45 237.08 336.784 238.478 339.047 243.204 340.645 246.531 342.775 250.126 343.573 253.587 338.182 253.986 338.715 251.257 336.651 246.398 335.187 243.004 333.257 239.676 332.192 236.148ZM317.416 235.483 322.142 233.353 331.26 254.918 326.468 257.846C323.206 251.59 319.013 241.806 317.416 235.483ZM244.202 255.583C242.139 253.254 236.015 245.267 235.682 242.605 235.416 240.475 238.877 227.163 239.942 224.9L245.267 224.767C246.465 226.697 247.929 229.825 248.994 231.889 250.858 235.483 250.192 235.682 249.061 239.609 247.796 244.069 245.799 251.923 244.202 255.583ZM159.207 248.329C159.274 245.267 160.805 241.939 161.603 238.811 163.001 233.22 162.668 232.887 168.925 232.687 168.725 235.816 167.926 240.075 166.728 243.137 165.663 245.6 162.535 247.131 159.207 248.329ZM324.604 234.418C331.193 234.817 330.528 236.881 332.991 242.605 334.189 245.267 337.184 251.39 337.45 253.786L332.991 254.585 324.604 234.418ZM125.995 235.217C128.724 233.153 131.053 233.353 134.048 234.95 130.587 236.148 129.722 236.681 125.995 235.217ZM271.89 226.231 279.278 226.298C280.609 228.561 284.27 237.812 284.07 240.075 283.937 241.739 279.677 251.523 278.746 253.054 276.616 250.458 274.22 243.736 272.622 240.342 269.095 232.887 268.762 234.218 271.89 226.231ZM261.241 251.124C254.319 240.608 250.458 233.752 255.384 225.033L264.303 225.499C266.632 231.356 268.229 231.756 266.166 237.746 264.635 242.006 263.504 247.596 261.241 251.124ZM318.547 232.82C316.151 234.019 317.549 234.285 315.752 232.82 316.418 232.355 315.086 231.023 318.547 232.82ZM306.434 230.358C315.353 230.624 317.749 234.285 309.961 239.942 308.497 237.812 306.367 233.22 306.434 230.358ZM176.246 240.941 168.991 243.47C168.792 239.876 170.256 235.549 171.321 232.088L178.309 230.558 176.246 240.941ZM131.985 232.355C135.646 230.225 137.11 229.892 140.837 231.822 137.176 233.952 135.978 233.286 131.985 232.355ZM331.193 229.493C331.06 227.23 334.189 229.958 332.192 229.892 331.06 229.825 331.26 231.09 331.193 229.493ZM294.72 228.76 303.971 229.559C308.231 240.009 309.961 240.941 302.64 249.194L294.72 228.76ZM216.514 254.918 208.327 248.595 213.119 226.165C223.635 224.501 223.436 228.894 221.372 236.747 220.241 241.34 218.311 251.124 216.514 254.918ZM186.097 239.876 178.509 240.408C178.642 237.014 179.441 232.687 181.105 230.025L188.36 228.561 186.097 239.876ZM206.597 246.798 198.343 242.804 201.272 227.363 210.923 225.965 206.597 246.798ZM195.814 241.673 188.426 240.075 190.556 228.295 199.142 227.429C198.942 231.556 198.011 238.611 195.814 241.673ZM281.674 227.096 289.595 227.496C289.661 230.025 286.999 234.484 285.734 236.947L281.674 227.096ZM234.085 240.475C231.756 238.877 230.025 235.882 228.295 233.553 225.299 229.493 224.9 230.424 225.898 225.366L237.413 225.033 234.085 240.475ZM138.973 228.894C142.168 227.562 141.236 227.429 144.697 228.161 143.3 231.157 141.17 230.092 138.973 228.894ZM221.972 225.499C225.1 225.965 223.17 224.434 223.835 227.03L221.972 225.499ZM266.1 225.566 269.96 225.566 268.562 230.291 266.1 225.566ZM333.19 223.635 336.185 227.429C332.458 226.298 333.856 225.033 333.19 223.635ZM247.53 224.967 252.655 224.834 251.257 231.689 247.53 224.967ZM181.837 223.635C183.501 222.571 182.236 221.705 185.365 223.569 182.503 224.9 183.834 224.967 181.837 223.635ZM319.679 219.908C323.273 221.439 324.005 221.838 325.336 225.166L320.81 224.368 319.679 219.908ZM299.512 222.038C297.049 223.303 297.515 223.835 295.319 221.905 297.315 220.574 297.116 220.84 299.512 222.038ZM164.465 218.843C164.598 222.837 165.064 220.64 162.934 221.173L164.465 218.843ZM203.069 219.376C204.999 218.843 205.998 218.51 208.261 219.642 204.799 221.905 205.066 221.106 203.069 219.376ZM297.049 218.644C297.848 217.046 297.116 217.645 298.913 217.246 299.179 219.642 300.377 220.973 297.049 218.644ZM176.779 218.377C179.241 217.645 178.709 217.246 181.238 218.777 179.507 221.572 178.509 220.441 176.779 218.377ZM274.486 217.978C277.148 215.981 275.151 216.314 276.682 218.178 274.153 219.642 275.418 218.644 274.486 217.978ZM259.71 217.512 262.04 218.976C259.444 218.777 260.775 218.91 259.71 217.512ZM217.046 219.176C219.709 216.447 221.372 217.446 224.434 218.377 221.372 220.707 220.84 220.041 217.046 219.176ZM195.082 217.978C198.011 216.447 196.28 216.713 199.009 217.911 194.217 220.64 195.947 218.777 195.082 217.978ZM233.553 218.377C235.949 216.114 236.947 215.915 239.543 217.845 237.28 219.709 236.015 218.976 233.553 218.377ZM252.522 216.314C249.726 217.845 250.658 217.712 247.53 216.78 250.126 215.316 249.593 215.116 252.522 216.314ZM301.043 211.655C305.103 211.655 307.565 217.446 308.83 220.973L302.773 220.108C301.242 217.046 300.044 214.783 301.043 211.655ZM267.697 216.913 263.171 216.181C265.767 214.384 266.033 214.051 267.697 216.913ZM324.271 214.251C326.668 217.712 329.862 219.11 330.262 224.368 325.736 222.371 325.27 218.71 324.271 214.251ZM310.028 214.783C314.487 215.582 316.351 219.243 317.948 223.369L312.823 222.571 310.028 214.783ZM197.811 214.85C200.606 213.386 200.074 212.653 202.403 214.717 201.006 217.113 199.675 216.248 197.811 214.85ZM184.499 217.046C182.702 214.051 183.767 214.916 186.03 214.85 184.765 218.71 185.032 216.181 184.499 217.046ZM257.713 213.851C257.713 212.653 259.577 212.254 259.044 213.851 258.579 215.449 257.647 215.915 257.713 213.851ZM242.338 213.918 246.997 214.85C244.269 216.447 244.135 216.713 242.338 213.918ZM215.715 213.718C217.712 212.254 217.778 212.187 220.773 213.851 218.577 215.848 217.579 216.514 215.715 213.718ZM274.552 212.92C272.09 215.183 273.288 214.517 268.962 213.452 269.095 213.319 271.091 212.254 271.225 212.254 276.15 211.189 273.221 211.988 274.552 212.92ZM193.884 214.384C194.017 211.189 193.152 212.587 196.28 212.254 195.881 213.452 196.746 212.853 195.348 213.918 194.283 214.717 193.884 214.384 193.884 214.384ZM191.954 211.588C191.821 214.85 192.752 213.985 189.558 213.519 190.756 210.457 189.092 212.321 191.954 211.588ZM175.78 215.116C175.913 211.522 176.113 212.72 178.775 213.186 177.111 215.582 178.842 214.251 175.78 215.116ZM277.548 211.389 284.802 209.791C286.333 212.387 287.864 214.717 288.33 217.246 280.21 216.78 281.541 216.514 277.548 211.389ZM244.868 212.187C247.197 210.124 250.392 209.858 251.989 212.853 248.994 213.652 247.663 213.585 244.868 212.187ZM229.027 213.053C232.887 210.39 233.752 212.254 237.48 212.52 235.017 215.249 232.088 214.65 229.027 213.053ZM205.731 211.189C206.996 210.657 203.668 210.39 207.595 210.657 209.924 210.856 208.793 210.856 210.058 211.522 207.928 214.051 207.994 213.785 205.731 211.189ZM288.596 209.924 294.187 209.791 297.049 215.382 294.387 218.976C289.195 217.579 290.127 214.317 288.596 209.924ZM305.635 207.129C306.833 209.924 306.833 208.527 305.901 210.657 304.237 207.928 304.171 209.126 305.635 207.129ZM260.376 205.665 264.103 211.855 259.843 211.189C259.377 207.994 259.044 208.261 260.376 205.665ZM239.01 208.726C242.405 209.392 241.273 208.194 241.207 211.189 239.077 210.457 239.876 211.322 239.01 208.726ZM216.58 208.86C218.51 207.795 216.447 207.994 218.777 208.993 214.85 211.855 217.579 209.592 216.58 208.86ZM155.946 219.309C156.611 214.184 159.074 211.522 161.869 207.528 161.47 211.322 159.407 217.379 155.946 219.309ZM251.324 208.061C252.588 206.131 253.52 205.532 255.85 205.066L257.314 210.39C253.853 210.39 253.254 210.59 251.324 208.061ZM230.092 208.926C231.889 206.33 231.29 206.597 234.418 208.061 232.421 209.459 232.155 210.191 230.092 208.926ZM321.676 206.397C324.538 207.661 324.937 209.392 323.872 211.655 321.143 208.527 322.607 209.192 321.676 206.397ZM308.164 204.733C310.427 205.598 309.429 204.6 310.76 207.928 311.625 210.124 312.025 210.856 312.291 213.053 308.164 212.387 306.434 207.196 308.164 204.733ZM277.947 208.926 280.609 200.939 283.871 207.994 277.947 208.926ZM245.134 201.738C247.33 204.2 248.129 204.799 247.663 207.661L244.002 208.194 245.134 201.738ZM225.765 208.593C226.364 207.329 224.767 207.595 227.829 206.796 227.762 210.058 227.829 208.327 225.765 208.593ZM191.222 207.062C189.89 209.791 189.425 209.525 185.897 208.327 187.095 206.796 183.834 207.661 187.96 206.597 189.757 206.131 190.09 206.663 191.222 207.062ZM315.153 205.731C318.547 207.329 321.077 214.983 321.809 218.577 316.418 216.647 316.817 210.723 315.153 205.731ZM205.399 207.395C206.863 205.399 206.929 205.332 209.991 206.996 209.858 207.062 208.194 207.928 208.127 207.928 205.399 208.261 207.196 208.46 205.399 207.395ZM218.71 205.598C220.308 204.6 217.978 204.267 222.304 204.799 223.369 204.933 224.834 205.731 225.566 206.131 222.171 207.462 221.239 208.593 218.71 205.598ZM161.736 216.713C161.869 212.986 166.196 207.994 168.459 204.799 167.993 209.392 165.663 214.45 161.736 216.713ZM195.814 204.733C197.345 203.801 195.415 204.001 197.744 204.001 199.475 203.934 199.076 204.267 200.14 204.733 197.877 206.796 197.545 207.062 195.814 204.733ZM231.223 202.337C232.82 205.532 231.955 203.801 231.356 205.066 231.023 204.334 229.226 204.799 231.223 202.337ZM207.262 204.334C210.191 201.671 211.855 201.738 215.049 204.067 212.254 206.597 210.79 204.733 207.262 204.334ZM176.712 209.325C177.378 206.663 177.644 205.864 178.908 203.335 180.839 199.608 180.506 199.209 184.499 200.739 184.899 205.731 181.77 208.86 176.712 209.325ZM261.307 201.538 268.229 197.212 273.421 209.525 266.565 211.988C264.169 209.192 261.64 204.999 261.307 201.538ZM186.496 205.265C186.895 202.137 186.696 201.871 189.757 203.136L186.496 205.265ZM220.973 202.27 226.431 194.816C229.759 198.011 229.359 199.741 228.894 203.934 226.231 204.2 222.903 203.668 220.973 202.27ZM197.877 201.738C201.272 200.274 202.67 199.076 204.733 202.403 202.137 203.601 200.739 203.269 197.877 201.738ZM242.006 207.196C238.079 207.062 237.879 206.929 235.949 203.535L242.605 196.413C244.601 199.541 242.871 203.868 242.006 207.196ZM168.725 213.053C169.124 208.993 172.119 199.408 176.712 199.342 176.579 204.6 174.249 212.121 168.725 213.053ZM319.08 198.21 322.142 203.668C319.213 203.069 318.814 201.006 319.08 198.21ZM210.324 198.476 212.321 200.473C207.795 200.34 210.191 200.606 210.324 198.476ZM305.302 198.343C306.367 198.743 306.567 198.343 307.499 199.741 309.695 203.069 307.765 202.67 307.299 203.468 304.504 201.338 306.034 201.338 305.302 198.343ZM248.994 189.824C251.191 191.688 254.119 200.007 254.652 203.202 252.522 204.666 252.122 205.199 249.66 205.598 246.798 199.941 243.803 195.415 248.994 189.824ZM203.868 189.691C207.395 193.418 206.264 193.751 205.399 198.543L199.209 198.809 203.868 189.691ZM270.759 196.613C273.221 194.949 275.751 194.084 278.479 193.951 280.41 198.942 278.147 203.735 275.684 208.527L270.759 196.613ZM207.262 199.541 207.795 194.882C209.192 196.48 209.259 195.215 208.993 197.145 208.726 199.209 207.395 199.342 207.262 199.541ZM294.853 193.951C299.246 194.35 298.38 196.28 299.978 200.606 301.043 203.735 302.906 206.131 301.575 208.926 297.315 209.059 298.247 207.196 297.049 202.736 296.317 199.808 295.052 196.813 294.853 193.951ZM300.044 195.814C303.572 196.813 305.435 202.603 304.038 205.598L300.044 195.814ZM283.804 192.752 287.531 192.353 293.588 207.795 287.798 207.928C286.533 204.267 283.005 196.08 283.804 192.752ZM262.239 198.011 264.902 187.894 267.83 195.481C265.367 197.012 264.502 197.744 262.239 198.011ZM290.327 192.686C294.187 193.352 296.25 204.334 296.517 208.127 294.32 206.73 290.46 195.748 290.327 192.686ZM235.083 201.605C231.689 197.079 231.889 196.746 233.02 191.355 233.752 187.827 233.885 183.967 235.882 180.972 237.679 182.636 241.14 191.887 241.739 194.483L235.083 201.605ZM176.113 197.611C175.181 195.282 176.645 192.819 178.509 190.623 178.442 193.684 178.642 196.014 176.113 197.611ZM218.577 202.204 221.239 185.431C223.436 187.095 224.9 190.556 225.566 193.218L218.577 202.204ZM230.092 193.951C228.228 190.822 228.827 190.955 231.423 188.227 231.489 190.29 231.622 192.819 230.092 193.951ZM285.867 187.028C287.265 188.892 286.932 187.295 287.199 190.423L284.669 190.556 285.867 187.028ZM252.189 186.696C253.653 188.692 254.518 191.421 255.45 193.951 256.848 197.345 258.379 199.009 257.048 201.804 254.851 199.342 249.793 189.158 252.189 186.696ZM197.278 198.077 199.342 184.3 202.603 187.295 197.278 198.077ZM309.362 185.431C311.559 187.627 311.559 189.158 312.557 192.153 314.155 197.345 315.353 198.144 314.021 201.338 311.359 197.212 309.895 190.49 309.362 185.431ZM288.53 181.504C290.061 183.567 291.192 187.694 291.525 190.556 288.729 190.556 289.195 191.155 288.33 187.494 287.598 184.632 287.398 184.233 288.53 181.504ZM253.786 184.899 261.84 177.977C264.902 183.368 262.572 194.15 259.577 199.009L253.786 184.899ZM249.926 179.907C251.656 182.369 252.122 183.235 250.392 185.83 248.528 183.102 249.127 183.301 249.926 179.907ZM193.352 202.071C190.556 202.27 190.955 202.27 188.892 200.34L197.212 181.704C197.744 185.697 195.748 199.475 193.352 202.071ZM185.498 198.743 182.17 198.011 188.227 181.903 185.498 198.743ZM215.782 201.605 211.588 196.679 219.442 182.702C219.709 187.295 217.845 198.077 215.782 201.605ZM271.491 171.321 277.614 191.421 270.226 194.416C269.028 191.754 266.499 185.098 266.432 182.503 266.366 178.709 268.229 172.918 271.491 171.321ZM180.173 197.345C178.11 193.019 182.103 184.832 185.83 182.569L180.173 197.345ZM304.237 180.173C307.299 182.835 307.233 185.231 308.231 189.092 309.096 192.553 311.093 196.679 310.028 199.142 308.031 196.546 304.038 183.501 304.237 180.173ZM210.524 194.882C207.528 191.022 208.527 190.756 209.459 186.03 210.058 182.835 210.257 178.642 211.722 175.98L218.111 181.438 210.524 194.882ZM167.128 197.478C167.194 190.49 170.189 186.696 173.384 181.171 172.985 184.765 169.058 195.015 167.128 197.478ZM292.523 175.714C294.054 179.175 296.65 188.493 296.517 192.087 292.523 191.421 292.856 188.759 291.791 184.832 290.526 180.306 288.862 178.642 292.523 175.714ZM242.072 172.519C245.4 179.041 250.192 186.696 243.47 192.752 242.072 190.29 240.408 186.03 239.144 183.035 237.147 178.309 238.079 175.647 242.072 172.519ZM206.996 189.824C202.803 186.895 207.728 182.236 208.793 178.642L206.996 189.824ZM188.093 197.412C187.894 192.353 189.757 180.972 192.02 177.178L195.814 180.24 188.093 197.412ZM312.624 176.179C315.153 180.905 318.215 188.692 318.614 194.15 316.018 191.688 312.89 180.04 312.624 176.179ZM227.363 189.757C218.644 180.506 223.702 172.452 229.626 162.934 234.684 171.92 236.215 182.569 227.363 189.757ZM298.713 175.381C302.44 177.045 305.968 192.02 306.367 196.48 301.708 194.882 300.643 180.173 298.713 175.381ZM253.32 182.968C250.924 177.444 250.658 175.98 252.056 170.056 252.988 165.863 253.52 160.805 255.384 157.144 257.647 161.27 260.908 171.121 261.108 175.847 258.445 179.108 255.983 180.173 253.32 182.968ZM198.011 174.382 196.413 178.376 193.684 175.647 198.011 174.382ZM182.968 181.97 183.9 174.516C186.829 174.582 186.829 174.516 188.826 175.98L182.968 181.97ZM266.765 174.649 264.902 179.108 263.837 176.379 266.765 174.649ZM203.735 185.764C198.277 181.97 200.14 178.908 201.405 173.051L208.394 174.116 203.735 185.764ZM173.317 194.416C173.317 191.488 174.649 186.363 175.381 183.301 176.379 178.842 176.512 175.98 181.504 175.447 178.576 182.769 177.511 187.228 173.317 194.416ZM294.919 172.319C297.448 175.181 301.442 190.157 301.442 193.951 297.781 192.752 297.848 187.96 296.783 184.033 295.918 180.905 292.989 174.649 294.919 172.319ZM213.652 168.525 217.911 178.709 211.655 173.251 213.652 168.525ZM274.286 170.256C275.751 173.118 280.21 188.36 280.077 191.288 277.947 189.158 272.489 171.72 274.286 170.256ZM236.747 176.379C236.082 172.985 237.213 167.261 238.611 164.399L240.941 170.256 236.747 176.379ZM289.262 175.98 287.931 169.324 291.591 170.921C291.658 174.183 291.858 173.65 289.262 175.98ZM276.216 169.79 281.541 168.392C283.471 176.712 287.931 183.301 282.273 190.29L276.216 169.79ZM243.07 170.256 245.932 166.329C247.663 171.454 249.726 174.782 247.863 180.04L243.07 170.256ZM299.312 168.592 299.911 173.317C296.583 172.319 295.718 169.723 299.312 168.592ZM287.132 180.639C285.202 178.176 283.138 170.655 284.337 168.459 287.465 170.256 288.33 177.311 287.132 180.639ZM304.104 167.46C307.898 169.124 309.496 178.842 309.496 182.968 306.7 178.642 304.903 172.718 304.104 167.46ZM304.637 177.511C302.174 175.514 300.111 169.856 301.575 166.994 303.372 169.524 304.504 174.316 304.637 177.511ZM202.137 170.921C202.071 166.994 204.799 160.139 206.863 156.678 211.722 162.801 212.121 164.931 209.592 172.585L202.137 170.921ZM224.434 168.925C224.567 164.665 225.299 158.408 227.163 155.014L228.827 160.272 224.434 168.925ZM276.017 167.66C272.023 161.204 273.421 155.613 277.281 149.157L280.809 166.662 276.017 167.66ZM242.272 167.793C237.213 160.605 239.609 157.809 240.941 149.822 241.939 152.684 242.871 155.48 243.803 158.608 245.333 163.667 245.999 163.8 242.272 167.793ZM235.15 171.387C232.088 163.334 229.359 163.4 233.286 155.147 238.877 161.404 236.348 163.8 235.15 171.387ZM178.642 174.05C178.576 168.725 182.17 161.47 185.032 157.41 184.899 160.871 184.1 164.066 183.434 167.527 182.636 171.72 183.035 173.051 178.642 174.05ZM301.974 158.275C303.905 161.27 305.103 161.537 305.369 165.663 302.307 164.731 301.974 161.404 301.974 158.275ZM253.121 157.077 249.726 171.454 247.064 164.266 253.121 157.077ZM300.843 165.397C298.913 162.202 297.914 156.545 297.781 152.618 299.645 155.413 301.974 162.469 300.843 165.397ZM263.703 173.983C261.374 170.722 259.577 160.871 258.312 156.478 256.915 151.353 256.382 152.352 259.51 148.624 261.441 146.295 262.772 144.964 265.168 143.633 266.565 147.16 268.562 156.345 269.294 160.405 270.293 166.063 268.762 171.188 263.703 173.983ZM192.153 174.05C192.553 169.524 198.011 151.486 200.274 147.759 204.866 151.886 202.337 156.811 201.338 163.001 200.14 170.189 199.941 172.585 192.153 174.05ZM220.374 177.244C214.317 168.326 214.118 166.262 216.114 161.337 218.045 156.478 220.441 148.292 223.103 144.564 226.564 151.686 225.166 153.35 223.835 160.805 222.903 166.262 222.371 172.119 220.374 177.244ZM229.892 157.077C227.562 152.751 227.03 151.087 228.361 146.295 230.025 148.158 231.689 151.087 232.221 153.683L229.892 157.077ZM309.229 159.274C310.095 162.735 311.625 167.194 311.958 170.655 308.564 164.399 305.702 156.412 305.302 149.09 305.968 151.62 308.164 155.28 309.229 159.274ZM239.41 143.167C240.275 147.493 239.144 153.749 237.08 157.543L234.418 152.684 239.41 143.167ZM280.476 155.68C279.611 151.753 278.28 147.626 278.746 143.633 280.41 146.761 285.335 164.332 284.337 166.861 281.475 165.064 281.208 159.141 280.476 155.68ZM187.162 145.563C188.027 148.158 186.496 151.287 184.366 153.483L187.162 145.563ZM246.598 161.736C242.738 150.555 239.809 141.902 246.132 130.521L253.919 152.152 246.598 161.736ZM195.548 155.68C195.348 152.352 195.947 147.559 197.678 144.897 199.741 149.09 196.679 149.956 195.548 155.68ZM184.699 172.386C185.564 167.66 191.887 145.962 194.283 143.633 196.28 146.96 190.822 170.788 189.558 173.517L184.699 172.386ZM297.382 167.128C294.653 165.264 291.458 148.624 291.259 145.496 295.585 147.626 295.186 150.555 296.184 154.881 297.715 161.803 299.379 164.132 297.382 167.128ZM293.854 163.933C294.786 167.593 295.651 168.059 293.189 169.324 291.325 165.73 290.859 160.272 289.994 156.079 288.397 148.758 287.731 148.425 288.197 143.167 288.197 142.701 288.463 142.701 288.596 142.434L293.854 163.933ZM275.085 138.774C276.35 144.897 275.617 150.488 272.09 155.48 270.626 151.02 268.629 145.762 268.429 141.17L275.085 138.774ZM218.444 134.847C223.303 139.306 221.705 142.301 219.642 147.559 218.045 151.753 216.647 157.011 213.985 160.339L218.444 134.847ZM226.231 146.894C224.967 144.564 224.035 143.233 224.834 140.97 227.562 143.433 227.03 143.1 226.231 146.894ZM287.332 151.486C288.397 156.745 290.194 162.535 290.327 167.727 285.734 167.46 286.932 167.194 285.668 162.136 285.069 159.74 284.403 157.344 283.804 154.881 282.673 150.421 280.343 143.965 280.276 139.572 286 139.373 284.669 139.24 287.332 151.486ZM262.306 128.923 264.569 141.236 257.048 148.225 262.306 128.923ZM233.153 150.222C226.697 141.236 229.493 138.441 232.687 120.004 234.551 124.73 236.215 131.053 237.546 136.045 238.744 140.704 236.614 147.293 233.153 150.222ZM283.471 134.78C285.202 138.042 284.203 136.71 283.538 138.108 281.075 136.71 280.942 137.576 283.471 134.78ZM203.535 147.892C199.808 145.296 201.006 144.232 202.137 139.972 203.136 136.444 202.869 134.181 206.264 133.05L203.535 147.892ZM211.855 160.339C206.53 155.946 207.861 152.551 209.858 146.827L213.452 135.579C215.049 131.719 213.519 133.848 215.848 132.917 217.179 135.978 213.452 156.678 211.855 160.339ZM198.543 143.899C196.347 140.371 198.011 136.977 201.472 135.646 201.072 138.574 200.407 141.969 198.543 143.899ZM206.264 151.62C204.4 149.157 205.998 145.03 206.53 141.902 207.129 138.508 207.462 134.913 208.527 131.519L212.653 132.384 206.264 151.62ZM183.9 146.228C184.033 141.236 185.964 135.379 188.426 131.386 188.227 135.446 186.363 142.967 183.9 146.228ZM215.782 131.253C214.916 129.389 215.648 128.657 216.314 127.592 217.845 130.521 218.045 130.055 215.782 131.253ZM290.127 127.659C292.124 131.652 293.522 140.172 293.788 145.03 289.395 140.837 290.393 143.699 288.729 136.777 287.465 131.519 285.135 129.389 290.127 127.659ZM303.039 156.212C298.913 152.684 294.586 130.254 294.453 128.391 298.78 130.853 302.707 150.621 303.039 156.212ZM248.728 125.795C250.791 129.655 252.655 138.175 252.721 142.834 251.39 140.305 250.392 137.043 249.26 134.115 247.663 129.855 246.531 129.389 248.728 125.795ZM229.692 126.993 227.096 141.037 224.967 138.641 229.692 126.993ZM267.897 139.373C266.1 136.91 263.104 125.595 263.504 122.334 263.77 119.805 267.431 108.49 268.562 106.626 269.893 109.555 274.419 133.582 274.22 136.91L267.897 139.373ZM210.856 117.276C215.116 121.336 215.449 124.996 213.319 130.587L208.66 129.655 210.856 117.276ZM255.717 145.296C254.053 139.306 252.721 133.249 251.257 127.193 250.126 122.6 250.192 122.667 252.988 119.073 254.518 117.076 256.249 114.879 257.647 113.215 260.242 119.871 261.574 122.201 259.843 129.123 258.579 134.048 257.314 140.904 255.717 145.296ZM246.598 124.53C246.531 124.397 246.332 124.131 246.332 123.865 245.799 121.202 247.064 119.738 247.596 118.34 248.528 122.4 248.861 121.602 246.997 125.196 246.864 124.996 246.665 124.73 246.598 124.53ZM239.743 137.243C238.012 133.249 236.947 127.459 235.616 123.066 233.353 115.412 233.486 116.61 236.481 109.754 238.278 111.685 242.072 120.004 243.204 122.933 244.801 127.259 242.605 133.982 239.743 137.243ZM223.236 137.11C217.379 131.186 219.842 124.996 221.04 117.608 222.171 110.553 223.236 104.097 225.965 97.5743 227.962 101.767 232.155 114.081 230.89 118.34 229.825 122.201 225.299 134.448 223.236 137.11ZM282.806 110.553C287.997 113.415 289.794 119.472 289.927 125.728L286.866 126.993C285.002 124.264 282.872 114.081 282.806 110.553ZM203.735 108.823 208.194 112.616C207.728 116.277 205.998 120.87 204.866 124.53 203.468 129.123 203.735 132.917 198.942 133.982L203.735 108.823ZM249.793 119.938C247.663 113.548 249.127 102.233 252.122 96.1766 253.054 97.9736 256.316 107.425 256.515 109.488 256.848 112.217 251.79 118.141 249.793 119.938ZM190.955 142.102C189.491 139.04 192.886 127.592 193.684 123.199 194.816 117.209 195.015 111.352 200.673 108.623 200.606 113.349 193.751 138.907 190.955 142.102ZM266.233 106.56 262.173 119.139 260.642 109.355 266.233 106.56ZM244.468 121.202C242.538 118.074 240.342 113.615 238.811 109.888 237.147 105.694 239.876 100.703 242.205 97.1749 245.4 103.565 247.197 114.347 244.468 121.202ZM281.541 122.6C282.606 127.858 284.203 133.249 279.078 136.91L271.158 104.363 278.28 105.961C279.544 111.418 280.476 117.076 281.541 122.6ZM270.293 102.3C270.293 98.7723 272.889 93.9136 275.617 91.5175L277.548 103.764 270.293 102.3ZM232.355 110.021C230.291 107.159 228.428 99.9704 227.762 96.0434 231.889 101.302 235.549 102.633 232.355 110.021ZM247.197 107.225C245.799 104.297 244.468 98.972 244.069 95.5775 243.536 91.6506 246.132 89.2545 247.929 85.6604 252.255 90.5191 249.993 101.767 247.197 107.225ZM236.481 104.23C234.817 100.37 238.212 89.1214 239.21 84.4623 242.272 91.4509 241.34 98.5061 236.481 104.23ZM216.314 121.602C211.123 116.077 211.123 113.615 212.454 106.493 213.452 100.969 214.916 95.4444 216.713 89.9201L223.17 91.1181C223.835 101.435 217.911 113.149 216.314 121.602ZM256.848 103.964C255.717 101.634 254.785 98.373 253.919 95.6441 252.455 90.985 252.921 91.6506 254.186 87.9899L256.848 103.964ZM203.934 106.626C204.799 98.3064 206.33 95.2448 212.92 91.1181 212.72 96.6425 210.856 105.295 208.726 110.487L203.934 106.626ZM292.124 106.959C293.455 112.35 295.984 120.87 296.317 126.128 291.192 118.207 285.801 100.436 285.468 90.5857 287.265 95.9103 290.46 100.103 292.124 106.959ZM281.608 86.326C283.405 89.1214 287.065 107.957 287.199 112.084 283.405 107.625 281.608 107.957 280.143 101.701 279.145 97.5743 277.414 92.05 277.148 87.9233L281.608 86.326ZM260.376 107.558C258.379 104.563 251.989 80.9347 258.845 71.8828 261.174 73.9461 267.897 99.1717 267.497 103.099L260.376 107.558ZM219.376 81.2675C220.574 83.5305 222.105 86.3925 222.238 89.1214L217.246 88.0565 219.376 81.2675ZM251.523 88.3893C249.726 84.3958 248.528 82.8649 251.989 79.537 253.52 82.9315 253.121 85.3941 251.523 88.3893ZM243.004 90.0532C239.609 83.464 240.275 77.4072 241.872 70.6848 243.47 72.7481 245.799 78.2059 246.598 80.8682 247.53 84.1961 245.4 87.9233 243.004 90.0532ZM234.019 100.503C225.898 92.5159 227.696 91.8503 229.426 87.1912 231.29 81.9997 232.82 75.8098 235.616 71.2172 238.544 77.0078 238.278 80.2026 237.014 86.5256 236.348 90.1198 235.483 97.774 234.019 100.503ZM207.728 92.7155C207.861 87.524 210.856 78.0062 212.853 72.615L215.515 75.477C215.915 82.8649 214.051 89.4542 207.728 92.7155ZM275.085 74.2124C277.548 77.5403 280.809 80.3357 281.142 84.4623L276.882 85.9266C275.351 83.1312 274.153 76.675 275.085 74.2124ZM206.597 73.7465C207.861 75.1442 207.462 73.6799 207.595 76.0094L206.197 83.0646C204.2 93.248 204.4 97.2415 197.478 106.16L206.597 73.7465ZM263.97 69.1539 270.16 70.4185C273.754 81.4672 277.015 87.7237 269.028 97.6408L263.97 69.1539ZM239.277 74.9445C237.613 71.0176 235.949 67.8893 238.345 63.8293 242.072 68.089 240.275 69.4202 239.277 74.9445ZM248.661 80.8016C247.73 79.4705 243.736 70.0858 243.27 68.2887 242.471 65.1605 244.269 58.1053 245.666 54.8439L246.265 53.779C247.929 57.5728 248.794 63.4299 249.726 67.6897 251.124 74.2124 252.788 75.2773 248.661 80.8016ZM225.965 90.7188C225.699 86.925 230.225 62.1653 231.689 58.1718 235.749 65.2936 234.751 67.5565 232.022 74.9445 230.158 79.9364 228.627 86.7253 225.965 90.7188ZM279.677 64.029C281.608 70.0192 286.267 80.3357 286.733 87.4574 282.739 84.7286 280.01 69.4867 279.677 64.029ZM263.703 67.0906C261.041 62.0322 262.306 58.5712 265.7 53.9787L269.161 68.2221 263.703 67.0906ZM234.085 57.8391C236.548 60.3683 238.079 61.2335 235.816 64.2952L234.085 57.8391ZM210.856 70.0192C210.657 65.6929 211.322 61.3666 214.584 58.8374 214.251 62.0988 213.053 67.8893 210.856 70.0192ZM224.234 86.2594C218.843 80.7351 220.64 76.4088 222.77 69.8195 224.501 64.4949 225.699 56.5079 229.027 52.5144 230.957 55.5095 229.825 57.3066 229.027 61.2335 227.696 67.8893 225.965 81.9331 224.234 86.2594ZM240.874 64.1621C238.811 61.1004 239.343 61.6994 240.941 58.3715 241.806 56.641 242.738 54.5777 243.669 53.18 243.603 56.1085 242.538 62.0322 240.874 64.1621ZM213.386 70.1523C214.317 65.9592 216.78 55.9754 219.842 53.779L216.181 73.3471 213.386 70.1523ZM218.377 76.0094C217.579 71.1507 220.907 57.4397 222.105 51.5826L226.298 52.3813C225.366 56.8407 220.574 73.0809 218.377 76.0094ZM272.09 63.5631C270.293 55.1767 268.096 50.9836 271.091 48.9868 273.221 50.7173 278.679 72.1491 279.078 76.076 274.752 72.9478 273.288 69.3536 272.09 63.5631ZM249.527 58.904C248.262 53.1134 245.4 48.055 249.46 44.1946 251.39 46.857 254.053 55.7757 255.517 59.4365 257.713 65.0939 259.178 70.352 253.52 74.2789 251.324 70.951 250.458 63.2968 249.527 58.904ZM224.368 42.5972C227.163 45.3261 228.894 47.456 226.364 50.4511 223.769 49.9852 224.301 50.4511 222.571 48.7871L224.368 42.5972ZM238.079 59.503C232.421 53.7124 232.155 52.9137 234.085 44.9268 235.35 39.8018 236.215 31.482 238.944 27.4885 244.601 41.7985 247.064 47.5225 238.079 59.503ZM269.494 47.9884C267.83 45.9251 268.096 45.7255 268.096 42.93 270.293 45.3261 271.957 46.391 269.494 47.9884ZM255.184 38.2044C256.848 43.529 260.642 58.0387 259.178 62.8309 257.447 60.9007 251.856 45.3261 251.191 42.1313L255.184 38.2044ZM230.89 49.6524C229.493 45.9251 231.023 44.0615 232.887 40.8002 232.887 43.7287 232.088 47.7222 230.89 49.6524ZM213.519 56.9738C213.585 53.6459 214.85 50.2514 215.582 46.9901 216.647 42.5972 215.915 40.1346 220.507 40.0015 219.243 45.5924 217.778 53.2465 213.519 56.9738ZM262.439 38.5372C265.966 38.5372 265.234 39.0031 265.966 42.3976 267.564 49.8521 267.165 47.2563 266.033 49.0534L262.439 38.5372ZM261.174 57.3066 257.181 37.4723C261.84 37.8716 261.84 42.1313 263.437 46.5907 264.968 50.8504 263.97 53.9787 261.174 57.3066ZM250.592 39.9349C249.127 37.206 251.257 33.6119 253.254 30.8164L254.785 35.8749 250.592 39.9349ZM247.73 38.0713 245.866 43.6622 242.471 30.5502 247.73 38.0713ZM255.384 31.5486C254.452 28.4203 253.72 27.8879 255.717 24.8928L259.311 35.409C255.65 35.6752 256.449 35.5421 255.384 31.5486ZM218.111 38.1378C218.577 32.8797 222.105 28.7531 225.499 25.6249 225.366 28.62 224.301 30.2174 223.103 33.0794 221.572 37.0063 222.304 37.5388 218.111 38.1378ZM264.303 36.3408C260.176 36.607 260.975 34.4771 259.377 30.0177 258.379 27.2223 257.181 24.7596 257.181 21.8976L259.51 20.9658C261.507 23.6947 263.837 32.5469 264.303 36.3408ZM240.674 23.3619C242.738 20.4999 246.665 16.8392 248.528 14.4431 251.989 23.495 253.587 27.7548 248.262 36.1411 246.265 33.2791 241.273 26.357 240.674 23.3619ZM228.694 45.1264C224.7 41.0664 224.567 39.9349 226.032 34.344 227.163 29.8181 227.562 25.9577 229.825 21.8976L237.08 23.2954C236.881 27.4885 231.09 41.3992 228.694 45.1264ZM232.421 15.6412 235.483 21.0324 230.424 19.8343 232.421 15.6412ZM251.59 17.0389C253.454 19.9675 255.45 20.4999 253.187 24.0941L251.59 17.0389ZM222.704 25.6915C223.036 23.2954 223.769 22.0973 224.7 19.8343 225.965 16.573 225.433 16.4399 228.96 15.7077 228.295 19.9675 225.499 22.7629 222.704 25.6915ZM252.389 13.9107 257.713 14.9756C259.178 16.9723 259.51 17.172 258.778 19.3684L255.85 20.5665 252.389 13.9107ZM240.874 20.1671C241.273 16.6395 243.403 11.2483 245.6 8.31977L247.863 12.3798 240.874 20.1671ZM233.353 13.3782C234.551 11.3814 236.082 9.65094 237.48 7.78731 239.743 4.72563 239.809 4.72563 243.603 5.92368 243.27 9.78405 240.075 17.0389 237.746 20.5665L233.353 13.3782ZM251.856 3.72726 256.848 12.4464C250.325 12.7126 249.926 11.5146 246.798 5.724L251.856 3.72726ZM234.285 8.98535C234.551 5.59089 234.551 4.99186 237.28 4.52596 236.415 7.85386 236.082 6.45614 234.285 8.98535ZM248.062 2.72889C245.866 4.4594 246.332 4.79219 244.135 2.12986L248.062 2.72889ZM212.92 42.7969C192.553 87.524 177.777 167.727 152.418 214.85 146.827 225.166 147.493 223.436 136.777 228.028 116.144 236.881 1.66395 291.858 0 293.854 5.19154 298.048 115.478 323.073 131.519 327 175.381 337.783 219.176 350.096 263.038 361.344 299.578 370.729 286.4 372.26 322.674 352.026 340.379 342.109 357.883 331.659 375.455 321.543 386.903 314.953 479.685 266.033 481.149 261.707L460.782 256.515C430.565 251.523 381.312 237.613 348.365 230.491 341.577 229.027 342.508 228.228 339.18 222.504 326.468 200.739 312.757 159.873 305.435 136.577 293.056 97.5743 285.535 72.8146 271.557 33.5453 270.093 29.4187 254.652 2.52921 251.656 1.39772 247.596-0.0665582 242.671-0.599024 238.811 0.865256 226.497 5.59089 217.645 30.6168 212.92 42.7969Z" fill="#5B5B5B" fill-rule="evenodd" transform="matrix(1.00127 0 0 1 2443.12 2286)"/></g></g></g><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1431.35 2743)">param</text><path d="M3440 1969C3440 1993.3 3420.3 2013 3396 2013 3371.7 2013 3352 1993.3 3352 1969 3352 1944.7 3371.7 1925 3396 1925 3420.3 1925 3440 1944.7 3440 1969Z" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3422 1968.5C3422 1983.14 3410.36 1995 3396 1995 3381.64 1995 3370 1983.14 3370 1968.5 3370 1953.86 3381.64 1942 3396 1942 3410.36 1942 3422 1953.86 3422 1968.5Z" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3290 2253 3503.19 2253" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M3290 2130 3503.19 2130" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 0.236385 102.701" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 3396.24 2013)"/><path d="M3409 2129C3409 2136.18 3403.18 2142 3396 2142 3388.82 2142 3383 2136.18 3383 2129 3383 2121.82 3388.82 2116 3396 2116 3403.18 2116 3409 2121.82 3409 2129Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 1858.26 2933)">clip_log_std<tspan font-size="64" x="1353.34" y="-1068">clip_actions</tspan></text></g></svg>
\ No newline at end of file
+<svg width="3568" height="1235" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="155" y="1752" width="3568" height="1235"/></clipPath><clipPath id="clip1"><rect x="2410" y="2286" width="482" height="371"/></clipPath><clipPath id="clip2"><rect x="2410" y="2286" width="482" height="371"/></clipPath><clipPath id="clip3"><rect x="2410" y="2286" width="482" height="371"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-155 -1752)"><path d="M1027 2075C1027 2050.7 1046.7 2031 1071 2031 1095.3 2031 1115 2050.7 1115 2075 1115 2099.3 1095.3 2119 1071 2119 1046.7 2119 1027 2099.3 1027 2075Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2228C1027 2203.7 1046.7 2184 1071 2184 1095.3 2184 1115 2203.7 1115 2228 1115 2252.3 1095.3 2272 1071 2272 1046.7 2272 1027 2252.3 1027 2228Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2380C1027 2355.7 1046.7 2336 1071 2336 1095.3 2336 1115 2355.7 1115 2380 1115 2404.3 1095.3 2424 1071 2424 1046.7 2424 1027 2404.3 1027 2380Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2533C1027 2508.7 1046.7 2489 1071 2489 1095.3 2489 1115 2508.7 1115 2533 1115 2557.3 1095.3 2577 1071 2577 1046.7 2577 1027 2557.3 1027 2533Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2685C1027 2660.7 1046.7 2641 1071 2641 1095.3 2641 1115 2660.7 1115 2685 1115 2709.3 1095.3 2729 1071 2729 1046.7 2729 1027 2709.3 1027 2685Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2075C1257 2050.7 1276.7 2031 1301 2031 1325.3 2031 1345 2050.7 1345 2075 1345 2099.3 1325.3 2119 1301 2119 1276.7 2119 1257 2099.3 1257 2075Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2228C1257 2203.7 1276.7 2184 1301 2184 1325.3 2184 1345 2203.7 1345 2228 1345 2252.3 1325.3 2272 1301 2272 1276.7 2272 1257 2252.3 1257 2228Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2380C1257 2355.7 1276.7 2336 1301 2336 1325.3 2336 1345 2355.7 1345 2380 1345 2404.3 1325.3 2424 1301 2424 1276.7 2424 1257 2404.3 1257 2380Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2533C1257 2508.7 1276.7 2489 1301 2489 1325.3 2489 1345 2508.7 1345 2533 1345 2557.3 1325.3 2577 1301 2577 1276.7 2577 1257 2557.3 1257 2533Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2685C1257 2660.7 1276.7 2641 1301 2641 1325.3 2641 1345 2660.7 1345 2685 1345 2709.3 1325.3 2729 1301 2729 1276.7 2729 1257 2709.3 1257 2685Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1487 2152C1487 2127.7 1506.7 2108 1531 2108 1555.3 2108 1575 2127.7 1575 2152 1575 2176.3 1555.3 2196 1531 2196 1506.7 2196 1487 2176.3 1487 2152Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1487 2305C1487 2280.7 1506.7 2261 1531 2261 1555.3 2261 1575 2280.7 1575 2305 1575 2329.3 1555.3 2349 1531 2349 1506.7 2349 1487 2329.3 1487 2305Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1487 2457C1487 2432.7 1506.7 2413 1531 2413 1555.3 2413 1575 2432.7 1575 2457 1575 2481.3 1555.3 2501 1531 2501 1506.7 2501 1487 2481.3 1487 2457Z" fill="#C00000" fill-rule="evenodd"/><path d="M797 2228C797 2203.7 816.7 2184 841 2184 865.301 2184 885 2203.7 885 2228 885 2252.3 865.301 2272 841 2272 816.7 2272 797 2252.3 797 2228Z" fill="#70AD47" fill-rule="evenodd"/><path d="M797 2380C797 2355.7 816.7 2336 841 2336 865.301 2336 885 2355.7 885 2380 885 2404.3 865.301 2424 841 2424 816.7 2424 797 2404.3 797 2380Z" fill="#70AD47" fill-rule="evenodd"/><path d="M797 2533C797 2508.7 816.7 2489 841 2489 865.301 2489 885 2508.7 885 2533 885 2557.3 865.301 2577 841 2577 816.7 2577 797 2557.3 797 2533Z" fill="#70AD47" fill-rule="evenodd"/><path d="M1361.5 2075.5 1471.84 2152.25" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 75.5611" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2228.06)"/><path d="M0 0 110.337 227.87" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2380.37)"/><path d="M0 0 110.337 380.178" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2532.68)"/><path d="M0 0 110.337 532.487" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2684.99)"/><path d="M1471.84 2305.25 1361.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 75.5611" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2380.06)"/><path d="M0 0 110.337 227.87" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2532.37)"/><path d="M0 0 110.337 380.178" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2684.68)"/><path d="M1131.5 2075.5 1241.84 2075.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2227.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2380.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2532.43)"/><path d="M0 0 110.337 609.234" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2684.73)"/><path d="M1131.5 2075.5 1241.84 2227.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2380.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2533.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2685.43)"/><path d="M1131.5 2075.5 1241.84 2380.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2380.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2380.5 1241.84 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2532.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2685.12)"/><path d="M1131.5 2684.5 1241.84 2684.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2532.5 1241.84 2684.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2380.5 1241.84 2685.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2685.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2075.5 1241.84 2684.73" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2684.81)"/><path d="M1131.5 2532.5 1241.84 2532.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2380.5 1241.84 2532.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2533.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2075.5 1241.84 2532.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M901.5 2532.5 1011.84 2684.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2532.5)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2380.5)"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2380.5)"/><path d="M1011.84 2380.81 901.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2075.5)"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2228.5)"/><path d="M1011.84 2533.12 901.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1011.84 2685.43 901.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2075.5)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2228.5)"/><path d="M1011.84 2532.81 901.5 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1011.84 2685.12 901.5 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 901.5 2533.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 901.5 2532.43)"/><path d="M1361.5 2075.5 1471.84 2304.56" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 785.207 2082)">input<tspan fill="#5B9BD5" font-size="64" x="303.023" y="-90">hidden</tspan><tspan fill="#ED7D31" font-size="64" x="645.891" y="-42">output</tspan><tspan fill="#595959" font-weight="700" font-size="64" x="203.623" y="-248">.compute(…)</tspan></text><path d="M0 0 81.1221 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 781.622 2228.5)"/><path d="M781.622 2380.5 700.5 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 81.1221 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 781.622 2532.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.62 2457.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.62 2304.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.62 2152.5)"/><rect x="736" y="1892" width="900" height="893" stroke="#BFBFBF" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="none"/><path d="M626 2197C638.15 2197 648 2198.64 648 2200.67L648 2376.33C648 2378.36 657.85 2380 670 2380 657.85 2380 648 2381.64 648 2383.67L648 2559.33C648 2561.36 638.15 2563 626 2563" stroke="#70AD47" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 249.482 2325)">states (</text><path d="M492.763 2293.58C494.368 2293.58 496.092 2293.71 497.935 2293.97 499.779 2294.23 501.461 2294.58 502.982 2295.02L501.357 2302.48 497.388 2302.48C497.201 2300.55 496.706 2299.12 495.904 2298.2 495.102 2297.29 493.951 2296.83 492.451 2296.83 491.055 2296.83 489.935 2297.19 489.092 2297.91 488.248 2298.62 487.826 2299.6 487.826 2300.83 487.826 2301.91 488.18 2302.86 488.888 2303.67 489.597 2304.48 490.972 2305.51 493.013 2306.73 495.222 2308.05 496.805 2309.4 497.763 2310.8 498.722 2312.19 499.201 2313.83 499.201 2315.7 499.201 2317.83 498.633 2319.64 497.498 2321.14 496.362 2322.64 494.81 2323.76 492.842 2324.48 490.873 2325.21 488.628 2325.58 486.107 2325.58 482.066 2325.58 478.191 2325.08 474.482 2324.08L476.232 2316.33 480.201 2316.33C480.305 2318.2 480.789 2319.67 481.654 2320.73 482.519 2321.8 483.847 2322.33 485.638 2322.33 487.284 2322.33 488.555 2321.93 489.451 2321.12 490.347 2320.32 490.795 2319.19 490.795 2317.73 490.795 2316.86 490.649 2316.12 490.357 2315.52 490.066 2314.91 489.586 2314.32 488.92 2313.75 488.253 2313.18 487.17 2312.43 485.67 2311.52 483.586 2310.27 482.071 2308.97 481.123 2307.62 480.175 2306.28 479.701 2304.78 479.701 2303.11 479.701 2300.05 480.847 2297.69 483.138 2296.05 485.43 2294.4 488.638 2293.58 492.763 2293.58Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="474.482" y="2321.58">𝒔</text><path d="M514.274 2309.66 519.782 2309.66 518.451 2315.4 524.028 2315.4 523.408 2318.27 517.786 2318.27 515.56 2327.45C515.177 2329.02 514.913 2330.22 514.768 2331.04 514.623 2331.86 514.55 2332.55 514.55 2333.11 514.55 2333.83 514.68 2334.36 514.94 2334.69 515.2 2335.02 515.583 2335.18 516.087 2335.18 516.776 2335.18 517.484 2334.94 518.21 2334.45 518.937 2333.96 519.721 2333.2 520.563 2332.17L522.33 2333.99C520.784 2335.69 519.365 2336.87 518.073 2337.53 516.78 2338.2 515.254 2338.53 513.494 2338.53 511.75 2338.53 510.411 2338.08 509.478 2337.19 508.545 2336.29 508.078 2335.06 508.078 2333.48 508.078 2332.76 508.132 2332.08 508.239 2331.43 508.346 2330.78 508.568 2329.71 508.904 2328.23L511.268 2318.27 507.688 2318.27 508.124 2316.41C509.195 2316.41 510.006 2316.31 510.557 2316.11 511.108 2315.91 511.57 2315.61 511.945 2315.21 512.32 2314.82 512.683 2314.21 513.035 2313.4 513.387 2312.59 513.8 2311.34 514.274 2309.66Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="507.688" y="2334.92">𝒕</text><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 527.141 2325)">)<tspan font-weight="400" font-size="64" x="-332.75" y="77">with or without</tspan><tspan font-size="64" x="-301.148" y="154">actions (</tspan></text><path d="M510.326 2450.83C508.388 2450.83 506.67 2451.8 505.17 2453.75 503.67 2455.7 502.461 2458.32 501.545 2461.61 500.628 2464.9 500.17 2467.85 500.17 2470.45 500.17 2471.97 500.394 2473.11 500.842 2473.86 501.289 2474.61 501.982 2474.98 502.92 2474.98 504.295 2474.98 505.696 2474.31 507.123 2472.97 508.55 2471.62 509.701 2470.05 510.576 2468.23 511.451 2466.42 512.232 2463.94 512.92 2460.8L513.17 2459.64C513.503 2458.1 513.67 2456.56 513.67 2455.02 513.67 2453.64 513.409 2452.6 512.888 2451.89 512.368 2451.18 511.513 2450.83 510.326 2450.83ZM509.951 2447.58C513.055 2447.58 515.701 2448.33 517.888 2449.83L520.92 2447.58 524.42 2448.08 519.357 2469.33C519.045 2470.64 518.888 2471.83 518.888 2472.89 518.888 2473.58 519.003 2474.11 519.232 2474.48 519.461 2474.86 519.857 2475.05 520.42 2475.05 521.065 2475.05 521.763 2474.76 522.513 2474.17 523.263 2473.59 524.138 2472.72 525.138 2471.58L527.388 2473.83C525.201 2476.02 523.315 2477.52 521.732 2478.34 520.149 2479.17 518.482 2479.58 516.732 2479.58 514.982 2479.58 513.597 2479.09 512.576 2478.12 511.555 2477.16 511.045 2475.88 511.045 2474.3 511.045 2473.78 511.107 2473.28 511.232 2472.8L510.795 2472.73C509.274 2474.53 507.967 2475.88 506.873 2476.78 505.779 2477.69 504.618 2478.38 503.388 2478.86 502.159 2479.34 500.795 2479.58 499.295 2479.58 496.795 2479.58 494.899 2478.73 493.607 2477.03 492.316 2475.33 491.67 2472.86 491.67 2469.61 491.67 2466.8 492.076 2464.01 492.888 2461.23 493.701 2458.46 494.868 2456.07 496.388 2454.06 497.909 2452.05 499.821 2450.47 502.123 2449.31 504.425 2448.16 507.034 2447.58 509.951 2447.58Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="491.67" y="2475.58">𝒂</text><path d="M537.764 2463.66 543.272 2463.66 541.941 2469.4 547.518 2469.4 546.898 2472.27 541.275 2472.27 539.049 2481.45C538.667 2483.02 538.403 2484.22 538.258 2485.04 538.112 2485.86 538.039 2486.55 538.039 2487.11 538.039 2487.83 538.17 2488.36 538.43 2488.69 538.69 2489.02 539.072 2489.18 539.577 2489.18 540.266 2489.18 540.973 2488.94 541.7 2488.45 542.427 2487.96 543.211 2487.2 544.052 2486.17L545.819 2487.99C544.274 2489.69 542.855 2490.87 541.562 2491.53 540.269 2492.2 538.743 2492.53 536.984 2492.53 535.24 2492.53 533.901 2492.08 532.968 2491.19 532.034 2490.29 531.568 2489.06 531.568 2487.48 531.568 2486.76 531.621 2486.08 531.728 2485.43 531.836 2484.78 532.057 2483.71 532.394 2482.23L534.758 2472.27 531.178 2472.27 531.614 2470.41C532.685 2470.41 533.496 2470.31 534.046 2470.11 534.597 2469.91 535.06 2469.61 535.435 2469.21 535.81 2468.82 536.173 2468.21 536.525 2467.4 536.877 2466.59 537.29 2465.34 537.764 2463.66Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="531.178" y="2488.92">𝒕</text><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 550.63 2479)">)</text><path d="M1707 2418C1719.43 2418 1729.5 2419.68 1729.5 2421.75L1729.5 2527.75C1729.5 2529.82 1739.57 2531.5 1752 2531.5 1739.57 2531.5 1729.5 2533.18 1729.5 2535.25L1729.5 2641.25C1729.5 2643.32 1719.43 2645 1707 2645" stroke="#C00000" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M2001 2799C2001 2774.7 2020.48 2755 2044.5 2755 2068.52 2755 2088 2774.7 2088 2799 2088 2823.3 2068.52 2843 2044.5 2843 2020.48 2843 2001 2823.3 2001 2799Z" stroke="#C00000" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M2018 2799.5C2018 2784.86 2029.86 2773 2044.5 2773 2059.14 2773 2071 2784.86 2071 2799.5 2071 2814.14 2059.14 2826 2044.5 2826 2029.86 2826 2018 2814.14 2018 2799.5Z" fill="#C00000" fill-rule="evenodd"/><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1865.77 2518)">log standard<tspan font-size="64" x="-90.5209" y="77">deviations</tspan><tspan font-size="64" x="215.417" y="77">(</tspan></text><text fill="#C00000" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="64" transform="translate(2102.38 2595)">𝑙<tspan font-size="64" x="20.052" y="0">𝑜</tspan><tspan font-size="64" x="54.427" y="0">𝑔</tspan></text><text fill="#C00000" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2196.34 2595)">(</text><text fill="#C00000" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="64" transform="translate(2222.7 2595)">𝜎</text><text fill="#C00000" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2260.51 2595)">)<tspan font-family="Arial,Arial_MSFontService,sans-serif" font-size="64" x="26.354" y="0">)</tspan><tspan fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-size="64" x="-401.445" y="-383">mean actions</tspan><tspan fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-size="64" x="-292.82" y="-306">(</tspan></text><path d="M2008.92 2260.71C2006.77 2260.71 2004.85 2261.57 2003.14 2263.3 2001.43 2265.03 2000.1 2267.35 1999.14 2270.26 1998.18 2273.16 1997.7 2276.06 1997.7 2278.96 1997.7 2281.13 1998 2282.74 1998.59 2283.82 1999.18 2284.89 2000.18 2285.43 2001.57 2285.43 2002.95 2285.43 2004.3 2284.83 2005.62 2283.65 2006.94 2282.46 2008.31 2280.77 2009.72 2278.57 2011.12 2276.37 2012.12 2273.83 2012.7 2270.96L2012.98 2269.61C2013.17 2268.76 2013.29 2268.02 2013.36 2267.4 2013.42 2266.77 2013.45 2266.09 2013.45 2265.36 2013.45 2263.76 2013.11 2262.58 2012.42 2261.83 2011.73 2261.08 2010.56 2260.71 2008.92 2260.71ZM2008.29 2258.24C2009.65 2258.24 2010.91 2258.39 2012.09 2258.69 2013.27 2258.99 2014.48 2259.53 2015.73 2260.3L2018.61 2258.24 2020.61 2258.74 2016.01 2278.55C2015.55 2280.53 2015.32 2282.07 2015.32 2283.18 2015.32 2284.01 2015.46 2284.61 2015.73 2284.99 2016 2285.36 2016.43 2285.55 2017.01 2285.55 2017.64 2285.55 2018.3 2285.3 2019.01 2284.79 2019.72 2284.28 2020.72 2283.3 2022.01 2281.86L2023.79 2283.61C2021.92 2285.61 2020.32 2287.04 2019 2287.88 2017.67 2288.72 2016.24 2289.15 2014.7 2289.15 2013.41 2289.15 2012.38 2288.72 2011.61 2287.86 2010.83 2287.01 2010.45 2285.88 2010.45 2284.46 2010.45 2283.31 2010.69 2282.09 2011.17 2280.8L2010.76 2280.68C2008.76 2283.59 2006.87 2285.73 2005.07 2287.08 2003.28 2288.44 2001.39 2289.11 1999.39 2289.11 1997.1 2289.11 1995.32 2288.25 1994.06 2286.52 1992.8 2284.79 1992.17 2282.36 1992.17 2279.24 1992.17 2275.66 1992.87 2272.21 1994.28 2268.91 1995.68 2265.61 1997.62 2263.01 2000.07 2261.1 2002.53 2259.19 2005.27 2258.24 2008.29 2258.24Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="30.9062" x="1992.17" y="2285.28">𝑎</text><path d="M1996.15 2247.93 2023.68 2247.93 2023.68 2251.24 1996.15 2251.24Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="3.3125" x="1996.15" y="2250.83">ത</text><path d="M2034.28 2272.95 2037.86 2272.95 2036.55 2278.78 2044.35 2278.78 2043.82 2281.33 2036.04 2281.33 2033.7 2290.95C2033.55 2291.59 2033.42 2292.17 2033.32 2292.68 2033.22 2293.19 2033.14 2293.65 2033.08 2294.07 2033.02 2294.48 2032.98 2294.85 2032.96 2295.19 2032.93 2295.53 2032.92 2295.84 2032.92 2296.13 2032.92 2297.05 2033.11 2297.77 2033.5 2298.29 2033.88 2298.81 2034.51 2299.07 2035.38 2299.07 2036.11 2299.07 2036.89 2298.8 2037.72 2298.25 2038.54 2297.71 2039.49 2296.86 2040.54 2295.7L2042.12 2297.3C2041.34 2298.17 2040.6 2298.92 2039.9 2299.53 2039.19 2300.14 2038.51 2300.64 2037.83 2301.03 2037.16 2301.42 2036.49 2301.7 2035.84 2301.88 2035.18 2302.06 2034.51 2302.14 2033.82 2302.14 2032 2302.14 2030.64 2301.72 2029.74 2300.86 2028.85 2300 2028.4 2298.74 2028.4 2297.07 2028.4 2296.51 2028.44 2295.88 2028.53 2295.2 2028.61 2294.52 2028.73 2293.86 2028.88 2293.22L2031.61 2281.33 2027.67 2281.33 2028.13 2279.56C2029.04 2279.56 2029.76 2279.47 2030.27 2279.3 2030.78 2279.12 2031.22 2278.88 2031.57 2278.58 2031.8 2278.36 2032.02 2278.1 2032.23 2277.78 2032.45 2277.47 2032.66 2277.09 2032.88 2276.65 2033.09 2276.2 2033.31 2275.68 2033.53 2275.08 2033.75 2274.47 2034 2273.76 2034.28 2272.95Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.1914" x="2027.67" y="2298.5">𝑡</text><path d="M2061.53 2274.47 2065.25 2274.47 2065.25 2286.54 2076.61 2286.54 2076.61 2290.05 2065.25 2290.05 2065.25 2302.12 2061.53 2302.12 2061.53 2290.05 2050.17 2290.05 2050.17 2286.54 2061.53 2286.54Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="2050.17" y="2298.66">+</text><path d="M2095.59 2272.17 2096.99 2272.17C2096.96 2273.01 2096.93 2273.77 2096.92 2274.43 2096.9 2275.1 2096.89 2275.75 2096.88 2276.37 2096.88 2277 2096.87 2277.62 2096.87 2278.23L2096.87 2295.67C2096.87 2296.15 2096.89 2296.56 2096.92 2296.9 2096.95 2297.24 2097 2297.54 2097.07 2297.77 2097.14 2298.01 2097.23 2298.21 2097.34 2298.38 2097.46 2298.55 2097.59 2298.69 2097.74 2298.82 2097.93 2299 2098.17 2299.14 2098.48 2299.24 2098.78 2299.34 2099.17 2299.43 2099.65 2299.49 2100.12 2299.56 2100.69 2299.62 2101.36 2299.65 2102.02 2299.69 2102.82 2299.72 2103.76 2299.73L2103.76 2301.73 2085.56 2301.73 2085.56 2299.73C2086.46 2299.7 2087.23 2299.67 2087.85 2299.63 2088.48 2299.59 2089.02 2299.54 2089.46 2299.47 2089.9 2299.4 2090.26 2299.33 2090.53 2299.24 2090.79 2299.16 2091.03 2299.05 2091.23 2298.93 2091.44 2298.79 2091.62 2298.64 2091.78 2298.48 2091.93 2298.32 2092.05 2298.12 2092.14 2297.88 2092.24 2297.63 2092.31 2297.33 2092.36 2296.98 2092.42 2296.63 2092.44 2296.19 2092.44 2295.67L2092.44 2279.22C2092.44 2278.62 2092.33 2278.2 2092.1 2277.96 2091.87 2277.71 2091.54 2277.59 2091.11 2277.59 2090.64 2277.59 2089.93 2277.83 2089 2278.32 2088.07 2278.81 2086.86 2279.54 2085.37 2280.5 2085.17 2280.15 2084.99 2279.8 2084.82 2279.45 2084.65 2279.1 2084.48 2278.74 2084.3 2278.37 2086.19 2277.33 2088.07 2276.29 2089.93 2275.26 2091.79 2274.23 2093.67 2273.2 2095.59 2272.17Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.5586" x="2084.3" y="2298.04">1</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2109.68 2289)">)</text><path d="M2326 2115C2338.15 2115 2348 2116.64 2348 2118.67L2348 2376.33C2348 2378.36 2357.85 2380 2370 2380 2357.85 2380 2348 2381.64 2348 2383.67L2348 2641.33C2348 2643.36 2338.15 2645 2326 2645" stroke="#595959" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2521.44 2142)">gaussian<tspan font-size="64" x="-26.6292" y="77">distribution</tspan></text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="translate(2433.15 2344)">𝒩</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 2479.56 2344)">(</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="translate(2499.61 2344)">𝜇</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 2525.96 2344)">,</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="translate(2535.7 2344)">𝜎</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 2563.77 2344)">)</text><path d="M2924 2115C2936.15 2115 2946 2116.64 2946 2118.67L2946 2376.33C2946 2378.36 2955.85 2380 2968 2380 2955.85 2380 2946 2381.64 2946 2383.67L2946 2641.33C2946 2643.36 2936.15 2645 2924 2645" stroke="#595959" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3183.49 2214)">actions (</text><path d="M3467.83 2185.75C3465.89 2185.75 3464.17 2186.73 3462.67 2188.68 3461.17 2190.62 3459.96 2193.24 3459.05 2196.53 3458.13 2199.83 3457.67 2202.77 3457.67 2205.38 3457.67 2206.9 3457.89 2208.03 3458.34 2208.78 3458.79 2209.53 3459.48 2209.91 3460.42 2209.91 3461.8 2209.91 3463.2 2209.24 3464.62 2207.89 3466.05 2206.55 3467.2 2204.97 3468.08 2203.16 3468.95 2201.35 3469.73 2198.87 3470.42 2195.72L3470.67 2194.57C3471 2193.02 3471.17 2191.48 3471.17 2189.94 3471.17 2188.57 3470.91 2187.52 3470.39 2186.82 3469.87 2186.11 3469.01 2185.75 3467.83 2185.75ZM3467.45 2182.5C3470.56 2182.5 3473.2 2183.25 3475.39 2184.75L3478.42 2182.5 3481.92 2183 3476.86 2204.25C3476.55 2205.57 3476.39 2206.75 3476.39 2207.82 3476.39 2208.5 3476.5 2209.03 3476.73 2209.41 3476.96 2209.78 3477.36 2209.97 3477.92 2209.97 3478.57 2209.97 3479.26 2209.68 3480.01 2209.1 3480.76 2208.51 3481.64 2207.65 3482.64 2206.5L3484.89 2208.75C3482.7 2210.94 3480.82 2212.45 3479.23 2213.27 3477.65 2214.09 3475.98 2214.5 3474.23 2214.5 3472.48 2214.5 3471.1 2214.02 3470.08 2213.05 3469.06 2212.08 3468.55 2210.81 3468.55 2209.22 3468.55 2208.7 3468.61 2208.2 3468.73 2207.72L3468.3 2207.66C3466.77 2209.45 3465.47 2210.8 3464.37 2211.71 3463.28 2212.61 3462.12 2213.31 3460.89 2213.78 3459.66 2214.26 3458.3 2214.5 3456.8 2214.5 3454.3 2214.5 3452.4 2213.65 3451.11 2211.96 3449.82 2210.26 3449.17 2207.78 3449.17 2204.53 3449.17 2201.72 3449.58 2198.93 3450.39 2196.16 3451.2 2193.39 3452.37 2191 3453.89 2188.99 3455.41 2186.98 3457.32 2185.39 3459.62 2184.24 3461.93 2183.08 3464.54 2182.5 3467.45 2182.5Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="3449.17" y="2210.5">𝒂</text><path d="M3495.27 2198.59 3500.77 2198.59 3499.44 2204.32 3505.02 2204.32 3504.4 2207.19 3498.78 2207.19 3496.55 2216.37C3496.17 2217.95 3495.9 2219.14 3495.76 2219.96 3495.61 2220.78 3495.54 2221.47 3495.54 2222.04 3495.54 2222.76 3495.67 2223.28 3495.93 2223.61 3496.19 2223.94 3496.57 2224.11 3497.08 2224.11 3497.77 2224.11 3498.47 2223.86 3499.2 2223.37 3499.93 2222.88 3500.71 2222.12 3501.55 2221.1L3503.32 2222.91C3501.78 2224.61 3500.36 2225.79 3499.06 2226.46 3497.77 2227.12 3496.24 2227.46 3494.48 2227.46 3492.74 2227.46 3491.4 2227.01 3490.47 2226.11 3489.54 2225.22 3489.07 2223.98 3489.07 2222.41 3489.07 2221.69 3489.12 2221 3489.23 2220.35 3489.34 2219.7 3489.56 2218.64 3489.9 2217.15L3492.26 2207.19 3488.68 2207.19 3489.11 2205.33C3490.19 2205.33 3491 2205.23 3491.55 2205.03 3492.1 2204.84 3492.56 2204.54 3492.94 2204.14 3493.31 2203.74 3493.67 2203.14 3494.03 2202.33 3494.38 2201.52 3494.79 2200.27 3495.27 2198.59Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="3488.68" y="2223.85">𝒕</text><path d="M3520.71 2199.83 3524.43 2199.83 3524.43 2211.9 3535.79 2211.9 3535.79 2215.41 3524.43 2215.41 3524.43 2227.48 3520.71 2227.48 3520.71 2215.41 3509.35 2215.41 3509.35 2211.9 3520.71 2211.9Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3509.35" y="2224.02">+</text><path d="M3555.02 2195.4 3557.96 2195.4C3557.86 2196.86 3557.82 2198.9 3557.82 2201.5L3557.82 2220.69C3557.82 2221.57 3557.87 2222.24 3557.97 2222.69 3558.07 2223.15 3558.24 2223.51 3558.49 2223.8 3558.75 2224.08 3559.11 2224.3 3559.58 2224.46 3560.06 2224.62 3560.66 2224.74 3561.38 2224.83 3562.09 2224.91 3563.04 2224.97 3564.22 2225L3564.22 2227.09 3544.42 2227.09 3544.42 2225C3546.11 2224.92 3547.33 2224.82 3548.05 2224.69 3548.78 2224.56 3549.33 2224.37 3549.72 2224.11 3550.1 2223.85 3550.38 2223.48 3550.55 2223 3550.73 2222.53 3550.82 2221.76 3550.82 2220.69L3550.82 2204.09C3550.82 2203.47 3550.7 2203.02 3550.46 2202.74 3550.23 2202.46 3549.9 2202.33 3549.49 2202.33 3549.1 2202.33 3548.57 2202.5 3547.87 2202.85 3547.17 2203.21 3546.03 2203.93 3544.44 2205.01L3543.04 2202.58Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="31.6929" x="3543.04" y="2223.13">𝟏</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3570.01 2214)">)<tspan font-size="64" x="-310.418" y="153">log prob </tspan><tspan font-weight="400" font-size="64" x="-426.101" y="230">evaluated at</tspan></text><path d="M3530.03 2416.23C3527.88 2416.23 3525.96 2417.1 3524.25 2418.83 3522.54 2420.56 3521.21 2422.87 3520.25 2425.78 3519.29 2428.69 3518.81 2431.59 3518.81 2434.48 3518.81 2436.65 3519.11 2438.27 3519.7 2439.34 3520.3 2440.42 3521.29 2440.95 3522.69 2440.95 3524.06 2440.95 3525.41 2440.36 3526.73 2439.17 3528.06 2437.98 3529.42 2436.29 3530.83 2434.09 3532.23 2431.89 3533.23 2429.36 3533.81 2426.48L3534.09 2425.14C3534.28 2424.29 3534.41 2423.55 3534.47 2422.92 3534.53 2422.3 3534.56 2421.62 3534.56 2420.89 3534.56 2419.29 3534.22 2418.11 3533.53 2417.36 3532.84 2416.61 3531.68 2416.23 3530.03 2416.23ZM3529.41 2413.76C3530.76 2413.76 3532.03 2413.92 3533.2 2414.22 3534.38 2414.52 3535.59 2415.06 3536.84 2415.83L3539.72 2413.76 3541.72 2414.26 3537.12 2434.08C3536.67 2436.06 3536.44 2437.6 3536.44 2438.7 3536.44 2439.54 3536.57 2440.14 3536.84 2440.51 3537.11 2440.89 3537.54 2441.08 3538.12 2441.08 3538.75 2441.08 3539.42 2440.82 3540.12 2440.31 3540.83 2439.8 3541.83 2438.83 3543.12 2437.39L3544.91 2439.14C3543.03 2441.14 3541.43 2442.56 3540.11 2443.41 3538.79 2444.25 3537.35 2444.67 3535.81 2444.67 3534.52 2444.67 3533.49 2444.24 3532.72 2443.39 3531.95 2442.54 3531.56 2441.4 3531.56 2439.98 3531.56 2438.84 3531.8 2437.62 3532.28 2436.33L3531.87 2436.2C3529.87 2439.12 3527.98 2441.25 3526.19 2442.61 3524.39 2443.96 3522.5 2444.64 3520.5 2444.64 3518.21 2444.64 3516.43 2443.78 3515.17 2442.05 3513.91 2440.32 3513.28 2437.89 3513.28 2434.76 3513.28 2431.18 3513.98 2427.74 3515.39 2424.44 3516.8 2421.13 3518.73 2418.53 3521.19 2416.62 3523.64 2414.72 3526.38 2413.76 3529.41 2413.76Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="30.9062" x="3513.28" y="2440.81">𝑎</text><path d="M3555.39 2428.48 3558.97 2428.48 3557.66 2434.31 3565.46 2434.31 3564.93 2436.85 3557.16 2436.85 3554.81 2446.47C3554.66 2447.11 3554.53 2447.69 3554.44 2448.2 3554.34 2448.72 3554.26 2449.18 3554.19 2449.59 3554.13 2450 3554.09 2450.38 3554.07 2450.72 3554.05 2451.05 3554.03 2451.37 3554.03 2451.66 3554.03 2452.57 3554.23 2453.29 3554.61 2453.81 3554.99 2454.33 3555.62 2454.59 3556.49 2454.59 3557.22 2454.59 3558 2454.32 3558.83 2453.78 3559.66 2453.24 3560.6 2452.38 3561.65 2451.22L3563.24 2452.83C3562.46 2453.7 3561.71 2454.44 3561.01 2455.05 3560.31 2455.67 3559.62 2456.17 3558.95 2456.56 3558.27 2456.95 3557.61 2457.23 3556.95 2457.41 3556.29 2457.58 3555.62 2457.67 3554.93 2457.67 3553.11 2457.67 3551.75 2457.24 3550.86 2456.38 3549.96 2455.53 3549.51 2454.27 3549.51 2452.6 3549.51 2452.03 3549.55 2451.41 3549.64 2450.73 3549.72 2450.05 3549.84 2449.38 3549.99 2448.74L3552.73 2436.85 3548.78 2436.85 3549.24 2435.09C3550.16 2435.09 3550.87 2435 3551.38 2434.82 3551.9 2434.65 3552.33 2434.41 3552.68 2434.1 3552.91 2433.89 3553.13 2433.62 3553.35 2433.31 3553.56 2433 3553.77 2432.62 3553.99 2432.17 3554.2 2431.73 3554.42 2431.21 3554.64 2430.6 3554.86 2430 3555.11 2429.29 3555.39 2428.48Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.1914" x="3548.78" y="2454.02">𝑡</text><path d="M3582.64 2429.99 3586.36 2429.99 3586.36 2442.06 3597.72 2442.06 3597.72 2445.58 3586.36 2445.58 3586.36 2457.65 3582.64 2457.65 3582.64 2445.58 3571.28 2445.58 3571.28 2442.06 3582.64 2442.06Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3571.28" y="2454.19">+</text><path d="M3616.7 2427.7 3618.1 2427.7C3618.07 2428.54 3618.04 2429.29 3618.03 2429.96 3618.01 2430.62 3618 2431.27 3617.99 2431.9 3617.99 2432.52 3617.98 2433.14 3617.98 2433.76L3617.98 2451.2C3617.98 2451.67 3618 2452.08 3618.03 2452.43 3618.06 2452.77 3618.11 2453.06 3618.18 2453.3 3618.25 2453.53 3618.34 2453.74 3618.45 2453.91 3618.57 2454.07 3618.7 2454.22 3618.86 2454.34 3619.04 2454.53 3619.28 2454.67 3619.59 2454.77 3619.9 2454.87 3620.29 2454.95 3620.76 2455.02 3621.23 2455.09 3621.8 2455.14 3622.47 2455.18 3623.14 2455.22 3623.93 2455.24 3624.87 2455.26L3624.87 2457.26 3606.67 2457.26 3606.67 2455.26C3607.57 2455.23 3608.34 2455.19 3608.96 2455.16 3609.59 2455.12 3610.13 2455.06 3610.57 2455 3611.01 2454.93 3611.37 2454.85 3611.64 2454.77 3611.91 2454.68 3612.14 2454.58 3612.34 2454.46 3612.55 2454.32 3612.74 2454.17 3612.89 2454.01 3613.04 2453.85 3613.16 2453.65 3613.26 2453.4 3613.35 2453.16 3613.42 2452.86 3613.47 2452.51 3613.53 2452.15 3613.55 2451.72 3613.55 2451.2L3613.55 2434.74C3613.55 2434.15 3613.44 2433.73 3613.21 2433.48 3612.98 2433.24 3612.65 2433.11 3612.22 2433.11 3611.75 2433.11 3611.04 2433.36 3610.11 2433.85 3609.18 2434.34 3607.97 2435.06 3606.49 2436.03 3606.29 2435.68 3606.1 2435.32 3605.93 2434.97 3605.77 2434.62 3605.59 2434.26 3605.41 2433.89 3607.3 2432.85 3609.18 2431.82 3611.04 2430.78 3612.9 2429.75 3614.79 2428.72 3616.7 2427.7Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.5586" x="3605.41" y="2453.56">1</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3090.97 2590)">mean actions (</text><path d="M3560.35 2561.97C3558.42 2561.97 3556.7 2562.94 3555.2 2564.89 3553.7 2566.84 3552.49 2569.46 3551.57 2572.75 3550.66 2576.04 3550.2 2578.99 3550.2 2581.59 3550.2 2583.11 3550.42 2584.25 3550.87 2585 3551.32 2585.75 3552.01 2586.12 3552.95 2586.12 3554.32 2586.12 3555.72 2585.45 3557.15 2584.11 3558.58 2582.76 3559.73 2581.18 3560.6 2579.37 3561.48 2577.56 3562.26 2575.08 3562.95 2571.93L3563.2 2570.78C3563.53 2569.24 3563.7 2567.7 3563.7 2566.15 3563.7 2564.78 3563.44 2563.74 3562.92 2563.03 3562.39 2562.32 3561.54 2561.97 3560.35 2561.97ZM3559.98 2558.72C3563.08 2558.72 3565.73 2559.47 3567.92 2560.97L3570.95 2558.72 3574.45 2559.22 3569.38 2580.47C3569.07 2581.78 3568.92 2582.97 3568.92 2584.03 3568.92 2584.72 3569.03 2585.25 3569.26 2585.62 3569.49 2586 3569.88 2586.18 3570.45 2586.18 3571.09 2586.18 3571.79 2585.89 3572.54 2585.31 3573.29 2584.73 3574.17 2583.86 3575.17 2582.72L3577.42 2584.97C3575.23 2587.15 3573.34 2588.66 3571.76 2589.48 3570.18 2590.3 3568.51 2590.72 3566.76 2590.72 3565.01 2590.72 3563.62 2590.23 3562.6 2589.26 3561.58 2588.29 3561.07 2587.02 3561.07 2585.43 3561.07 2584.91 3561.13 2584.41 3561.26 2583.93L3560.82 2583.87C3559.3 2585.66 3557.99 2587.01 3556.9 2587.92 3555.81 2588.83 3554.64 2589.52 3553.42 2590 3552.19 2590.48 3550.82 2590.72 3549.32 2590.72 3546.82 2590.72 3544.93 2589.87 3543.63 2588.17 3542.34 2586.47 3541.7 2584 3541.7 2580.75 3541.7 2577.93 3542.1 2575.14 3542.92 2572.37 3543.73 2569.6 3544.89 2567.21 3546.42 2565.2 3547.94 2563.19 3549.85 2561.61 3552.15 2560.45 3554.45 2559.29 3557.06 2558.72 3559.98 2558.72Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="3541.7" y="2586.72">𝒂</text><path d="M3540.29 2548.49 3576.58 2548.49 3576.58 2551.8 3540.29 2551.8Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="3.3125" x="3540.29" y="2551.39">ഥ</text><path d="M3587.79 2574.8 3593.3 2574.8 3591.97 2580.54 3597.54 2580.54 3596.93 2583.4 3591.3 2583.4 3589.08 2592.58C3588.69 2594.16 3588.43 2595.36 3588.28 2596.18 3588.14 2596.99 3588.07 2597.69 3588.07 2598.25 3588.07 2598.97 3588.2 2599.5 3588.46 2599.82 3588.72 2600.15 3589.1 2600.32 3589.6 2600.32 3590.29 2600.32 3591 2600.07 3591.73 2599.58 3592.45 2599.09 3593.24 2598.34 3594.08 2597.31L3595.85 2599.12C3594.3 2600.82 3592.88 2602 3591.59 2602.67 3590.3 2603.34 3588.77 2603.67 3587.01 2603.67 3585.27 2603.67 3583.93 2603.22 3582.99 2602.33 3582.06 2601.43 3581.59 2600.2 3581.59 2598.62 3581.59 2597.9 3581.65 2597.22 3581.76 2596.57 3581.86 2595.92 3582.08 2594.85 3582.42 2593.36L3584.78 2583.4 3581.2 2583.4 3581.64 2581.55C3582.71 2581.55 3583.52 2581.45 3584.07 2581.25 3584.62 2581.05 3585.09 2580.75 3585.46 2580.35 3585.84 2579.95 3586.2 2579.35 3586.55 2578.54 3586.9 2577.73 3587.32 2576.48 3587.79 2574.8Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="3581.2" y="2600.06">𝒕</text><path d="M3613.24 2576.04 3616.96 2576.04 3616.96 2588.11 3628.31 2588.11 3628.31 2591.62 3616.96 2591.62 3616.96 2603.69 3613.24 2603.69 3613.24 2591.62 3601.88 2591.62 3601.88 2588.11 3613.24 2588.11Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3601.88" y="2600.23">+</text><path d="M3647.54 2571.61 3650.48 2571.61C3650.39 2573.08 3650.34 2575.11 3650.34 2577.71L3650.34 2596.9C3650.34 2597.79 3650.39 2598.46 3650.49 2598.91 3650.59 2599.36 3650.77 2599.73 3651.02 2600.01 3651.27 2600.29 3651.64 2600.51 3652.11 2600.67 3652.59 2600.83 3653.18 2600.96 3653.9 2601.04 3654.62 2601.13 3655.57 2601.18 3656.75 2601.21L3656.75 2603.3 3636.94 2603.3 3636.94 2601.21C3638.64 2601.14 3639.85 2601.03 3640.58 2600.9 3641.31 2600.77 3641.86 2600.58 3642.24 2600.32 3642.63 2600.06 3642.9 2599.69 3643.08 2599.22 3643.26 2598.74 3643.34 2597.97 3643.34 2596.9L3643.34 2580.31C3643.34 2579.68 3643.23 2579.23 3642.99 2578.95 3642.75 2578.68 3642.43 2578.54 3642.01 2578.54 3641.63 2578.54 3641.09 2578.72 3640.4 2579.07 3639.7 2579.42 3638.56 2580.14 3636.96 2581.22L3635.56 2578.79Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="31.6929" x="3635.56" y="2599.34">𝟏</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3662.53 2590)">)</text><path d="M1489 2613C1489 2588.7 1508.7 2569 1533 2569 1557.3 2569 1577 2588.7 1577 2613 1577 2637.3 1557.3 2657 1533 2657 1508.7 2657 1489 2637.3 1489 2613Z" fill="#C00000" fill-rule="evenodd"/><path d="M0 0 80.649 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.15 2613.5)"/><path d="M1705 2115C1717.43 2115 1727.5 2116.68 1727.5 2118.75L1727.5 2224.75C1727.5 2226.82 1737.57 2228.5 1750 2228.5 1737.57 2228.5 1727.5 2230.18 1727.5 2232.25L1727.5 2338.25C1727.5 2340.32 1717.43 2342 1705 2342" stroke="#ED7D31" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M2150.19 2432 1937 2432" stroke="#C00000" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M2150.19 2636 1937 2636" stroke="#C00000" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 0.236385 105.164" stroke="#C00000" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 2044 2755.16)"/><path d="M2031 2637C2031 2629.82 2037.04 2624 2044.5 2624 2051.96 2624 2058 2629.82 2058 2637 2058 2644.18 2051.96 2650 2044.5 2650 2037.04 2650 2031 2644.18 2031 2637Z" fill="#C00000" fill-rule="evenodd"/><g clip-path="url(#clip1)"><g clip-path="url(#clip2)"><g clip-path="url(#clip3)"><path d="M285.497 365.201C289.295 361.802 293.427 362.736 297.026 365.068 292.494 367.667 290.362 367.8 285.497 365.201ZM286.296 362.002C282.764 365.734 279.432 364.402 274.901 362.669 277.566 359.203 281.432 360.136 286.296 362.002ZM292.361 361.403C296.626 357.671 299.425 359.004 304.09 360.736 299.825 364.268 297.359 363.469 292.361 361.403ZM263.838 359.47C268.903 357.671 269.769 356.338 275.567 359.47 271.702 362.802 267.77 362.002 263.838 359.47ZM293.561 358.404C288.762 361.003 287.296 361.069 282.165 358.804 282.498 358.27 282.031 357.737 284.43 356.738 287.096 355.671 291.361 357.138 293.561 358.404ZM299.158 356.804C303.957 355.138 305.623 354.072 310.821 357.004 306.822 360.203 302.957 359.67 299.158 356.804ZM253.508 357.337C257.973 353.472 260.439 354.872 265.237 356.604 260.839 360.07 258.64 359.403 253.508 357.337ZM282.631 355.538C278.832 359.137 276.3 357.737 271.369 356.138 274.367 352.606 278.033 353.206 282.631 355.538ZM288.962 354.472C293.361 351.673 295.76 351.673 300.358 354.405 296.426 357.671 293.227 356.804 288.962 354.472ZM254.841 354.272C250.043 356.804 248.51 357.071 243.379 354.605 243.845 354.005 243.645 353.539 245.645 352.672 248.577 351.406 252.709 352.939 254.841 354.272ZM317.885 352.872C313.887 356.538 311.221 355.471 306.289 353.472 310.088 349.94 312.953 351.073 317.885 352.872ZM260.372 352.739C265.771 350.74 266.17 349.674 272.035 352.939 267.903 356.005 264.371 355.538 260.372 352.739ZM290.095 351.54C285.164 354.738 284.164 354.339 278.633 352.273 278.633 352.273 279.032 349.34 283.631 349.74 285.164 349.873 288.496 351.006 290.095 351.54ZM244.045 351.273C240.313 355.072 237.447 353.605 232.716 351.939 233.049 351.54 231.916 351.939 234.382 350.473 237.447 348.607 240.646 349.607 244.045 351.273ZM295.493 350.607C300.025 347.874 302.091 347.741 307.022 350.273 303.757 353.939 299.425 352.672 295.493 350.607ZM250.309 350.34C255.041 347.208 256.84 347.608 262.039 350.073 257.64 353.006 255.241 353.139 250.309 350.34ZM325.016 349.34C320.551 352.073 318.418 351.939 313.62 349.674 314.02 349.14 313.82 348.541 315.819 347.674 317.352 347.074 317.352 347.141 319.018 347.408 321.017 347.674 323.683 348.607 325.016 349.34ZM279.099 348.674C275.167 352.473 272.501 351.14 267.503 349.274 271.435 345.675 274.234 346.608 279.099 348.674ZM222.053 349.007C227.718 345.208 227.651 346.408 233.849 348.341 229.85 352.006 226.451 351.473 222.053 349.007ZM285.164 348.341C288.629 344.276 293.094 345.408 296.893 347.674 291.228 351.673 291.028 349.34 285.164 348.341ZM251.242 347.341C247.777 350.473 243.912 350.207 239.78 347.541 243.712 344.609 246.178 344.409 251.242 347.341ZM314.153 346.408C310.554 350.14 307.622 348.541 302.89 347.008 304.09 345.408 304.89 344.475 307.755 344.475 309.555 344.475 312.554 345.608 314.153 346.408ZM256.907 346.675C261.772 343.276 263.238 342.876 268.969 346.075 263.971 349.807 262.705 348.207 256.907 346.675ZM211.857 346.275C216.255 342.343 218.454 343.742 223.586 345.608 219.254 349.074 216.788 348.874 211.857 346.275ZM320.351 345.408C324.616 342.543 327.082 342.743 331.68 345.275 327.748 348.541 324.816 347.808 320.351 345.408ZM286.23 344.675C282.764 348.607 279.166 346.875 274.701 345.408 275.3 344.675 273.768 345.342 276.367 343.742 278.033 342.676 277.7 342.743 279.699 342.743 281.765 342.676 284.43 343.809 286.23 344.675ZM292.294 343.809C296.693 340.677 299.092 340.943 303.69 343.676 299.625 346.541 296.16 346.808 292.294 343.809ZM229.117 344.675C234.049 341.81 235.781 341.343 241.046 344.542 236.981 347.541 233.316 347.541 229.117 344.675ZM258.373 342.876C254.108 346.941 251.842 345.675 246.711 343.742 247.844 342.476 249.643 340.81 251.509 340.743 253.508 340.677 256.707 342.076 258.373 342.876ZM212.99 343.009C208.991 346.608 206.192 345.675 201.527 343.609 204.393 339.744 208.658 340.61 212.99 343.009ZM321.417 342.476C316.619 345.475 315.286 345.408 309.621 342.876 313.087 339.011 316.685 340.477 321.417 342.476ZM326.749 341.41C331.813 339.077 332.546 338.278 338.344 341.343 334.879 344.675 330.414 343.676 326.749 341.41ZM264.171 342.143 267.903 339.411 276.3 341.477C271.702 344.875 269.503 345.275 264.171 342.143ZM226.985 344.142 218.788 342.41C222.52 338.211 226.052 338.544 230.517 341.543L226.985 344.142ZM285.43 338.144 293.294 340.344C289.095 344.542 286.363 342.943 281.432 341.143L285.43 338.144ZM236.248 340.41C241.246 336.345 242.179 337.211 248.377 339.81 243.912 343.942 241.313 343.209 236.248 340.41ZM190.798 340.743C195.063 337.411 198.728 337.145 202.927 340.277 197.995 343.476 196.196 343.409 190.798 340.743ZM298.959 339.944C302.957 335.812 306.023 337.278 310.821 339.344 307.089 343.276 303.557 342.01 298.959 339.944ZM328.348 338.278C324.083 341.277 321.151 341.81 316.552 338.544 321.217 334.812 322.617 336.345 328.348 338.278ZM253.442 339.077 257.307 336.145C260.372 336.278 263.571 337.211 265.771 338.744 261.039 342.41 258.44 341.61 253.442 339.077ZM208.058 339.277 212.656 336.012 220.72 338.144C215.589 342.609 214.322 341.477 208.058 339.277ZM345.475 337.411C340.877 340.877 339.011 339.877 333.613 337.745 337.545 334.279 340.677 335.145 345.475 337.411ZM275.3 334.479 283.098 337.011C278.899 341.343 276.1 340.477 270.902 337.811L275.3 334.479ZM188.998 339.944C185.999 339.944 183.2 339.411 180.801 338.011 185 334.146 187.399 334.546 192.397 337.011L188.998 339.944ZM288.096 336.545 292.761 333.413 300.491 336.012C296.16 339.944 292.827 339.144 288.096 336.545ZM225.385 337.611 230.05 333.946 238.247 336.478C233.915 341.343 231.183 339.677 225.385 337.611ZM305.823 335.479 309.355 332.88C312.42 333.013 315.419 333.813 317.752 335.479 313.62 338.811 310.554 338.211 305.823 335.479ZM242.779 336.012C249.043 330.814 247.844 331.813 255.708 334.946 250.709 340.01 248.777 338.211 242.779 336.012ZM198.062 336.278C201.46 331.813 206.259 332.613 210.324 335.412 205.926 338.944 203.66 339.277 198.062 336.278ZM326.682 332.08 335.079 334.146C330.281 337.944 328.748 337.011 323.416 334.812L326.682 332.08ZM260.106 334.146C264.704 330.281 267.703 329.747 272.835 333.813 268.37 337.878 265.504 337.678 260.106 334.146ZM178.202 337.345 170.472 335.679C174.204 331.147 177.936 331.747 182.667 334.346L178.202 337.345ZM352.339 333.346C347.674 336.878 346.075 336.012 340.677 333.946 343.876 329.814 346.075 331.214 352.339 333.346ZM215.122 334.546 219.987 330.347 227.851 332.946C223.786 338.344 220.72 336.678 215.122 334.546ZM277.766 332.946 282.031 329.481 290.428 332.413C286.43 337.011 282.498 335.945 277.766 332.946ZM299.292 328.615 307.755 331.547C303.024 335.545 300.425 335.279 295.027 331.88L299.292 328.615ZM232.649 332.746 237.314 327.948 245.445 331.147C239.847 336.745 239.647 335.212 232.649 332.746ZM187.466 333.479C193.064 328.814 192.797 328.814 200.061 331.947 195.796 337.145 193.53 335.412 187.466 333.479ZM160.009 332.746 164.74 329.348 172.204 331.214C168.206 336.278 165.473 334.279 160.009 332.746ZM316.752 328.081 324.949 330.88 321.017 333.746 312.62 331.547C313.687 329.881 311.687 331.613 314.286 329.614 314.819 329.214 316.152 328.481 316.752 328.081ZM341.81 330.547C336.945 333.879 335.612 333.08 330.081 330.614 333.413 326.215 337.678 327.615 341.81 330.547ZM258.04 333.679 249.71 330.814 254.775 326.149 262.905 329.481 258.04 333.679ZM204.726 331.147C209.591 326.549 211.39 325.349 217.588 329.747 213.323 334.746 210.657 334.013 204.726 331.147ZM358.737 329.681C354.139 332.813 352.872 332.28 347.474 329.814 350.407 325.616 355.605 327.482 358.737 329.681ZM177.536 330.48C180.935 325.416 185 326.282 189.798 329.014 185.666 333.346 183 333.546 177.536 330.48ZM267.103 328.881 271.968 324.483 280.299 328.281C275.367 332.946 273.035 333.013 267.103 328.881ZM150.079 329.947C155.211 326.082 155.877 326.015 162.141 328.615 158.209 332.613 155.144 332.813 150.079 329.947ZM289.562 323.883 297.626 327.548 292.294 331.413 284.497 328.281C285.563 326.149 283.298 328.348 286.496 325.816 287.429 325.016 288.562 324.416 289.562 323.883ZM222.12 328.748 227.451 323.683 235.248 327.082C230.383 332.213 228.517 332.813 222.12 328.748ZM319.218 326.815 323.483 323.216 331.613 326.482 327.615 329.614 319.218 326.815ZM306.489 323.416 314.553 326.948C309.421 331.68 307.889 330.347 301.957 327.348 302.024 327.148 302.224 326.682 302.291 326.815L303.89 325.082C305.223 324.016 305.09 324.149 306.489 323.416ZM354.139 325.616C357.604 322.084 361.336 323.283 365.401 325.682 361.869 329.214 358.004 328.081 354.139 325.616ZM348.607 326.015 345.075 328.881 336.612 326.482 338.678 324.283C341.676 322.483 343.276 323.883 348.607 326.015ZM202.86 330.48 194.596 328.281 199.528 323.083 207.725 325.749 202.86 330.48ZM167.14 327.348 172.271 323.616 179.735 325.616C176.47 330.414 171.471 330.814 167.14 327.348ZM139.749 327.615C146.614 324.349 141.416 322.683 152.278 325.949 145.814 330.68 147.747 329.614 139.749 327.615ZM247.577 329.747 239.18 326.682 244.645 320.817C247.444 322.15 251.043 323.416 252.709 325.549L247.577 329.747ZM256.374 323.683 262.172 318.818 269.836 323.15C268.636 325.882 267.437 326.482 264.904 328.148 261.839 327.348 258.107 326.015 256.374 323.683ZM137.75 326.682 129.753 325.216C134.685 322.283 135.818 319.884 141.949 323.883L137.75 326.682ZM279.166 317.752C281.831 318.951 285.763 321.017 287.296 323.017L282.031 326.948 273.834 323.416C274.701 321.484 274.234 322.084 275.967 320.218 277.033 319.151 277.833 318.485 279.166 317.752ZM220.187 328.015 211.59 325.282 216.788 318.885 225.252 322.75 220.187 328.015ZM192.197 327.282 184.267 325.016 188.732 319.684C191.931 319.751 194.796 320.751 197.329 322.417L192.197 327.282ZM164.874 327.082 156.943 325.216C161.941 320.617 162.675 319.351 169.672 322.883 168.472 324.483 166.74 325.416 164.874 327.082ZM360.736 321.484C365.534 317.885 366.667 319.351 372.332 321.617L368.933 324.016C366.001 323.883 362.669 323.083 360.736 321.484ZM347.341 319.018 355.272 321.884C351.34 326.615 348.074 324.016 343.209 322.15L347.341 319.018ZM330.414 318.485 338.411 322.084C333.813 327.015 331.413 324.749 325.882 322.217 326.749 320.684 324.816 322.283 327.748 320.018 329.014 319.085 329.148 319.151 330.414 318.485ZM308.289 321.151 313.42 317.885 321.55 322.084 317.019 325.749C313.687 324.883 310.354 323.55 308.289 321.151ZM296.226 317.419 304.423 322.084 299.425 326.282 291.161 322.617C292.428 320.151 293.76 319.351 296.226 317.419ZM127.82 324.149C124.688 324.149 122.556 323.816 120.09 322.35L124.222 319.551 131.952 320.951 127.82 324.149ZM237.647 325.549C234.715 325.282 231.25 323.683 228.717 322.084L234.049 315.353 242.579 319.684 237.647 325.549ZM147.014 322.617C151.945 316.752 152.811 318.085 159.742 320.151 154.544 324.949 154.278 325.349 147.014 322.617ZM182.134 323.95 173.937 322.017 179.602 316.086 187.399 318.618 182.134 323.95ZM117.891 321.75C114.692 321.75 112.493 321.35 110.16 319.884 114.958 317.085 116.025 316.152 121.556 319.085L117.891 321.75ZM209.324 324.549 201.327 321.55 206.725 314.486 214.989 318.218 209.324 324.549ZM366.934 317.152C371.932 315.219 373.665 314.686 378.73 318.085 374.598 321.217 370.666 320.484 366.934 317.152ZM349.74 317.152 353.872 314.819 362.002 317.952 358.27 320.751C355.072 320.218 351.806 319.085 349.74 317.152ZM254.241 323.55 245.778 319.351 251.509 312.087C254.508 313.687 258.173 315.686 259.973 318.152L254.241 323.55ZM144.215 321.817 137.017 320.284C141.482 315.619 143.348 314.753 149.546 317.818 148.013 319.551 146.347 320.551 144.215 321.817ZM336.545 313.82C339.477 314.42 342.876 316.152 345.142 318.085L340.877 320.884 332.546 317.419 336.545 313.82ZM268.103 310.688C270.902 311.087 275.101 314.22 277.1 316.619L271.635 322.217 263.305 317.485 268.103 310.688ZM100.23 317.952C103.962 314.553 106.761 314.153 111.56 316.819 107.561 320.617 105.695 319.684 100.23 317.952ZM320.018 312.22 328.348 317.152 323.55 320.884 315.219 316.685 320.018 312.22ZM302.89 310.688 311.487 316.419 306.223 320.884 297.959 316.419 302.89 310.688ZM280.365 315.353 285.497 310.088 294.493 316.152 288.896 321.284C285.563 319.151 282.431 318.485 280.365 315.353ZM171.538 321.35 164.007 319.218 168.872 313.22 177.203 315.353 171.538 321.35ZM127.021 317.885C132.552 312.487 132.285 313.753 139.616 315.353 133.285 319.285 136.884 320.884 127.021 317.885ZM226.785 321.35 218.321 317.552 224.052 309.222 232.449 314.153 226.785 321.35ZM90.6339 315.553C93.6328 312.154 98.1645 312.487 101.963 314.62 97.3648 317.752 96.2985 317.752 90.6339 315.553ZM385.261 314.22C381.862 317.952 378.197 316.086 373.931 314.02 376.73 310.155 380.129 311.621 385.261 314.22ZM356.404 313.02 360.47 310.688 368.667 314.086 365.001 316.885C361.736 316.219 358.537 315.019 356.404 313.02ZM199.528 320.484C196.662 320.284 193.064 319.018 191.064 317.352L196.929 309.955 204.859 313.553 199.528 320.484ZM117.491 315.419 121.689 311.954 129.353 313.153C125.421 317.352 122.956 318.485 117.491 315.419ZM161.808 318.551 154.011 316.685 159.009 310.688 167.14 312.42 161.808 318.551ZM344.009 308.822 352.139 313.553 347.608 316.685 339.211 312.554 344.009 308.822ZM114.692 315.086 107.494 313.687C111.16 309.421 113.892 309.355 118.824 311.421L114.692 315.086ZM80.9041 313.153C84.9026 309.621 87.1018 310.688 91.8335 312.021 88.4347 315.553 85.3691 315.486 80.9041 313.153ZM321.884 311.021 326.615 306.223 335.212 312.287 330.281 316.152 321.884 311.021ZM244.312 318.285 235.781 314.153C236.381 311.154 239.18 306.756 241.646 304.756L249.976 311.087 244.312 318.285ZM380.262 309.688C385.327 307.622 386.793 307.622 391.658 310.754 387.66 313.953 383.728 312.554 380.262 309.688ZM189.132 317.152 180.868 314.553 186.266 306.423 194.463 309.022 189.132 317.152ZM151.745 315.886 143.881 314.486 149.213 308.422 157.143 310.155 151.745 315.886ZM71.2409 310.688C75.0395 307.555 77.1721 307.955 81.7038 310.021 77.9052 313.22 75.4394 312.887 71.2409 310.688ZM362.935 308.955C369.067 306.223 369.133 306.689 375.131 310.421L371.199 313.087C368.4 312.287 364.801 310.821 362.935 308.955ZM304.49 309.355 309.621 303.557 318.352 310.688 313.22 315.353 304.49 309.355ZM261.372 316.286 252.709 310.621 258.24 301.624 267.237 308.888 261.372 316.286ZM216.322 316.819 208.058 312.887 213.656 303.49 222.253 308.022 216.322 316.819ZM97.4981 311.221C102.163 307.822 103.696 307.422 109.227 309.555 104.829 313.753 103.296 313.287 97.4981 311.221ZM287.229 308.488 292.628 301.158C295.893 303.69 299.492 306.622 301.158 309.888L296.226 314.953 287.229 308.488ZM269.902 308.155 275.567 300.425 284.297 308.622 278.966 315.086C276.3 313.887 271.369 310.221 269.902 308.155ZM141.615 313.82 134.285 312.354C136.151 306.889 141.349 305.689 146.68 308.355L141.615 313.82ZM345.742 307.289 350.34 304.49 358.737 309.355 354.139 312.62C351.273 311.287 347.474 309.421 345.742 307.289ZM88.1681 309.022C91.6335 305.756 95.3655 305.556 99.3641 307.955 95.0323 310.688 93.5662 311.754 88.1681 309.022ZM61.2445 308.688C65.7762 305.023 66.9758 306.089 72.3072 307.689 67.9088 311.221 66.0428 310.088 61.2445 308.688ZM131.752 311.487 124.022 310.354 129.087 305.29 136.417 306.223 131.752 311.487ZM398.389 307.156C393.924 309.955 391.792 309.155 387.127 306.689 387.46 306.289 387.726 305.423 389.326 304.756 391.925 303.757 396.323 305.823 398.389 307.156ZM178.802 313.753 170.938 311.487 176.27 303.024 184.667 305.423 178.802 313.753ZM78.305 306.889C82.4369 303.757 84.2362 303.49 89.1011 305.823 85.6357 309.288 82.5035 308.688 78.305 306.889ZM372.865 303.357C375.931 303.69 379.263 305.09 381.729 306.889L377.13 309.421 369.8 305.823 372.865 303.357ZM328.481 305.29 333.346 300.891 341.943 307.422 337.145 311.221 328.481 305.29ZM122.489 309.555 114.625 308.555C118.424 303.424 121.023 303.624 126.688 305.223L122.489 309.555ZM51.5813 305.889C55.7798 303.29 58.4455 303.424 62.3108 305.49 58.1123 308.022 55.9131 308.488 51.5813 305.889ZM352.539 303.557C354.605 301.358 354.872 301.291 357.471 300.625L365.135 305.49 360.803 308.555 352.539 303.557ZM233.382 312.62 225.119 307.622 230.917 296.693 239.713 303.49C238.314 306.356 235.915 310.488 233.382 312.62ZM105.162 306.556C106.428 304.557 107.295 304.557 109.494 302.957L116.758 303.557C112.959 308.089 110.96 308.622 105.162 306.556ZM68.4419 304.423C72.9736 302.091 74.9062 301.358 79.6379 303.89 75.3061 306.556 73.2402 306.956 68.4419 304.423ZM404.587 303.424C400.855 307.022 397.723 305.29 393.258 302.957 398.456 300.158 398.789 301.024 404.587 303.424ZM206.392 311.954C203.393 311.087 200.061 309.955 197.995 308.089L203.726 297.626 212.323 302.291 206.392 311.954ZM169.006 310.688 160.742 309.088 166.673 300.291 174.47 302.224 169.006 310.688ZM102.296 306.023 94.9657 304.756C99.4307 301.024 100.63 300.691 106.362 302.491L102.296 306.023ZM41.8515 303.557C46.3832 301.158 47.7827 300.891 52.5143 303.157 48.7824 306.023 45.5169 305.689 41.8515 303.557ZM376.131 301.624C381.462 299.825 382.662 299.625 387.926 303.357L384.528 305.623C381.662 304.89 378.063 303.557 376.131 301.624ZM311.154 301.558 315.819 296.559 325.149 304.956 320.417 309.555C317.219 307.755 312.887 304.49 311.154 301.558ZM69.175 301.491C66.0428 305.09 63.5104 304.09 59.1786 302.491 61.7777 299.425 65.0431 299.425 69.175 301.491ZM251.043 309.488 242.579 303.29C243.179 299.625 246.111 294.627 248.377 291.695L256.907 299.825 251.043 309.488ZM159.009 308.089C156.343 308.222 153.145 307.755 151.012 306.489L156.61 298.425 164.007 299.758 159.009 308.089ZM85.8356 302.424C90.3673 299.225 91.167 299.158 96.765 301.091 92.6998 304.357 91.0337 305.223 85.8356 302.424ZM31.9218 301.091C36.7867 298.892 37.4531 298.159 42.4513 300.891 38.4528 303.49 36.1869 303.357 31.9218 301.091ZM410.785 300.158C407.186 303.557 404.187 301.491 399.722 299.692 403.654 296.893 406.32 297.692 410.785 300.158ZM359.07 299.158 363.135 296.759C365.934 297.892 370.133 300.291 371.532 302.291L368.134 304.29C365.068 303.757 361.136 301.491 359.07 299.158ZM339.877 295.826 348.607 303.024 343.742 306.622 335.212 299.892 339.877 295.826ZM299.358 292.094 308.355 302.024 303.024 308.089 294.027 299.758 299.358 292.094ZM268.703 307.156C265.504 305.823 261.372 302.291 259.773 299.092L265.371 288.829 274.234 299.025 268.703 307.156ZM75.9059 300.625C79.9044 297.359 81.9037 297.559 86.8353 299.425 82.6368 302.624 81.2373 302.757 75.9059 300.625ZM49.4488 299.958C53.3141 297.159 55.18 297.426 59.7784 299.358 56.7795 302.157 53.514 302.091 49.4488 299.958ZM382.728 298.292C387.66 296.693 389.526 296.493 394.257 300.025L391.259 301.957C388.326 301.558 384.728 300.158 382.728 298.292ZM276.7 298.559 282.231 289.295 291.095 299.292C290.295 301.758 287.429 305.223 285.497 306.889L276.7 298.559ZM148.813 306.223 141.349 305.29C141.615 303.09 144.948 299.225 146.747 297.626L154.144 298.225 148.813 306.223ZM32.7881 298.292C29.456 301.558 26.657 300.491 22.3919 298.825 26.857 296.693 27.7233 295.76 32.7881 298.292ZM138.617 304.756 131.352 303.757C135.884 296.826 135.018 296.693 143.815 297.292 143.548 299.492 140.283 303.224 138.617 304.756ZM66.1094 298.759C70.2413 296.093 71.5075 295.16 76.5723 297.426 72.7737 301.424 71.041 299.825 66.1094 298.759ZM196.329 307.422 187.732 304.49 193.597 292.628 202.06 296.493 196.329 307.422ZM128.687 303.424 121.423 302.357 126.821 296.626 134.151 296.826 128.687 303.424ZM112.226 301.224C114.492 295.96 118.357 295.96 123.955 296.426 121.356 302.557 118.424 302.157 112.226 301.224ZM39.719 297.892C43.251 294.693 46.983 295.027 50.1819 297.359 45.0504 299.892 45.0504 299.425 39.719 297.892ZM416.983 296.626C413.184 300.091 410.918 298.159 406.253 296.426 409.785 293.494 412.784 294.56 416.983 296.626ZM365.934 296.093C370.333 292.094 373.598 295.293 377.997 298.425L373.798 301.024 365.934 296.093ZM223.386 306.423 215.189 301.758C215.655 298.959 219.321 290.695 221.187 288.696L229.384 294.893C228.784 297.892 225.452 304.223 223.386 306.423ZM102.23 299.692C106.895 295.293 107.095 294.693 114.159 296.226 108.228 300.758 112.026 301.824 102.23 299.692ZM23.1916 295.76C19.4596 299.158 17.5936 298.159 12.9953 296.493 16.5274 293.627 18.7266 293.96 23.1916 295.76ZM389.259 295.027C394.257 293.494 395.79 293.694 400.588 296.759 396.99 299.958 392.391 297.826 389.259 295.027ZM317.818 295.293 322.75 290.095C326.015 291.761 330.347 296.559 331.68 299.958L326.682 303.69 317.818 295.293ZM99.6973 299.225 92.5665 298.225C97.6314 294.16 97.4981 294.16 104.296 295.427L99.6973 299.225ZM56.6462 296.093C61.1112 293.827 63.1105 293.361 67.309 295.626 62.9106 298.225 60.8447 298.559 56.6462 296.093ZM346.408 291.361 355.138 298.959 350.806 301.957C347.408 300.291 344.609 297.426 341.943 294.827L346.408 291.361ZM83.0366 296.493C87.635 292.894 88.3014 292.761 94.2992 294.56 89.4343 298.425 89.4343 298.159 83.0366 296.493ZM30.1891 295.16C34.1876 292.428 36.5868 292.894 40.7853 294.76 36.8533 297.359 34.3209 297.492 30.1891 295.16ZM57.3792 293.427C53.4473 296.293 51.648 296.026 47.0497 293.96 50.5817 291.561 53.1141 291.161 57.3792 293.427ZM13.795 293.427C10.3962 296.693 8.19704 295.76 3.73198 294.294 6.53097 290.961 9.32996 291.695 13.795 293.427ZM423.314 293.427C419.715 296.426 416.783 294.827 412.717 293.227 415.716 290.362 419.448 291.228 423.314 293.427ZM396.323 292.561C399.522 289.695 403.188 291.428 406.853 293.427 403.587 296.693 399.855 294.893 396.323 292.561ZM372.265 292.894 376.264 290.828 384.594 295.427 380.529 297.692 372.265 292.894ZM80.7041 295.826 73.3068 294.76C78.9714 291.028 77.7052 291.561 84.5694 292.961L80.7041 295.826ZM185.799 303.823 177.669 301.224 183.6 288.829 191.931 291.761 185.799 303.823ZM20.5259 292.894C24.9243 290.362 26.2572 290.162 30.9222 292.361 27.2568 295.227 24.1246 294.76 20.5259 292.894ZM240.713 301.491 231.916 294.627 238.047 280.499 246.577 289.295C246.111 292.761 243.112 298.492 240.713 301.491ZM64.2434 292.561C67.8421 288.896 69.9747 290.228 74.773 291.295 70.841 294.36 69.1083 294.627 64.2434 292.561ZM37.7197 291.695C41.2518 288.762 43.8508 289.295 48.0493 291.228 43.7842 293.361 42.7179 294.094 37.7197 291.695ZM379.396 290.095C383.195 287.296 387.06 289.695 390.459 292.561 386.394 295.293 382.728 293.294 379.396 290.095ZM348.341 290.428 353.272 287.896 361.669 295.093 357.271 298.092 348.341 290.428ZM300.558 290.495 306.023 282.964 315.219 294.827 309.755 300.625 300.558 290.495ZM20.9924 290.162C16.594 293.094 15.661 292.161 10.7295 290.562 14.9946 288.029 16.3274 287.629 20.9924 290.162ZM418.982 289.295C423.38 287.696 425.313 287.896 429.511 290.095 426.312 292.894 421.781 291.761 418.982 289.295ZM402.188 289.629C406.053 287.163 409.985 287.963 413.384 290.628 408.186 293.094 407.386 291.695 402.188 289.629ZM175.603 300.358 167.673 298.625 173.271 286.763C176.07 286.496 179.069 287.163 181.468 288.162 180.601 292.161 178.002 297.093 175.603 300.358ZM54.447 290.428C59.0453 287.163 59.7117 287.963 64.9765 289.429 61.5111 292.628 58.6455 292.161 54.447 290.428ZM324.683 288.562C326.015 286.163 327.482 285.23 329.747 283.831L338.678 294.094 333.679 298.359 324.683 288.562ZM116.225 294.294 109.627 293.894 114.425 288.562 120.956 288.162 116.225 294.294ZM99.8306 293.027C103.496 288.696 105.495 288.229 111.226 288.829 108.961 294.094 105.162 293.76 99.8306 293.027ZM90.7005 291.695C94.4992 287.696 95.9653 287.963 101.363 288.762 99.0309 292.561 95.0323 293.294 90.7005 291.695ZM28.2564 289.229C31.9884 286.43 34.8541 287.03 38.5194 289.029 34.2543 291.295 32.8548 291.628 28.2564 289.229ZM385.994 287.629C389.659 284.83 392.791 287.363 396.99 289.429 393.324 292.961 389.592 290.562 385.994 287.629ZM283.631 287.696 289.029 277.366C291.761 280.232 297.093 287.563 297.959 291.028L292.628 298.092 283.631 287.696ZM258.24 298.159 249.043 289.429 255.041 275.5 264.171 287.363 258.24 298.159ZM213.19 300.558 204.726 295.893 210.857 280.898 219.587 287.363 213.19 300.558ZM80.8374 290.495C84.2362 286.43 87.0352 287.429 91.8335 288.029 86.5021 291.961 88.5013 291.961 80.8374 290.495ZM355.005 287.296 359.67 285.03 368.134 292.228 363.935 294.427 355.005 287.296ZM272.302 274.701 281.298 287.496C279.499 290.628 277.433 295.16 274.701 296.826 273.568 295.626 266.903 287.963 266.77 286.763 266.704 284.964 271.035 276.233 272.302 274.701ZM165.673 298.092 158.076 297.026C159.009 292.961 161.542 289.162 163.474 285.363L171.405 285.763 165.673 298.092ZM126.088 294.493 119.29 294.427C122.289 288.029 124.222 287.229 131.419 287.229L126.088 294.493ZM71.1076 288.762C75.9059 285.363 75.5727 285.363 82.037 287.096 77.6386 290.695 76.7056 289.762 71.1076 288.762ZM44.5839 288.096C50.1819 285.03 49.3155 285.897 55.5133 287.229 50.3818 290.162 51.0482 290.362 44.5839 288.096ZM435.709 286.83C432.177 289.295 429.245 288.362 425.38 286.496 429.445 283.964 431.511 284.63 435.709 286.83ZM419.782 287.296C415.117 289.629 413.784 289.229 408.852 286.563 412.917 283.964 415.45 285.363 419.782 287.296ZM136.017 294.893 128.753 294.827C130.953 288.629 134.418 285.563 141.349 286.296L136.017 294.893ZM18.5933 286.83C22.6585 283.964 24.6577 284.897 29.0562 286.43 24.991 288.829 22.9917 289.362 18.5933 286.83ZM392.591 285.164C396.057 282.764 399.389 284.497 402.854 286.83 399.322 289.495 395.79 287.696 392.591 285.164ZM155.677 296.359 147.88 295.693 153.878 284.83 161.408 285.03 155.677 296.359ZM145.081 295.493 138.417 295.16C140.682 287.963 143.815 284.564 151.412 285.43 149.812 288.762 147.813 293.094 145.081 295.493ZM61.711 286.63C65.2431 283.631 68.5752 283.897 72.1739 285.63 67.4423 288.562 67.2423 288.562 61.711 286.63ZM45.4502 285.43C41.7849 287.696 40.452 288.096 35.5871 285.763 38.3861 283.364 41.5183 283.031 45.4502 285.43ZM361.802 284.231C366.801 282.898 365.268 282.165 368.933 285.03 371.066 286.696 372.465 287.496 374.398 289.762L369.733 291.428 361.802 284.231ZM415.65 283.497C418.915 280.698 422.847 282.564 425.713 284.031 421.581 286.43 419.582 286.097 415.65 283.497ZM52.0478 284.697C56.7795 281.698 56.8461 281.765 62.5107 283.631 58.7121 287.229 57.046 285.83 52.0478 284.697ZM431.644 282.831C436.109 280.965 437.908 281.365 442.173 283.164 438.841 285.963 434.843 285.23 431.644 282.831ZM399.122 282.631C402.388 280.299 406.586 282.098 409.252 284.097 405.187 286.23 402.588 285.23 399.122 282.631ZM368.933 282.631C372.865 279.832 376.864 283.764 380.596 286.763 376.197 290.362 372.599 286.296 368.933 282.631ZM331.413 282.764 336.078 278.699 345.275 289.695 340.344 293.294 331.413 282.764ZM87.9682 285.697C91.8335 282.165 93.3662 281.831 98.631 282.831 94.2326 287.096 94.4325 286.896 87.9682 285.697ZM26.0572 283.431C29.5893 280.765 32.8548 280.965 36.1869 283.298 32.1884 285.297 30.5889 285.697 26.0572 283.431ZM307.689 280.898 312.62 274.301C315.553 277.566 320.484 284.697 321.684 288.429L316.952 293.561C312.62 290.028 310.488 284.497 307.689 280.898ZM103.896 286.963 97.3648 286.563C102.496 282.631 101.097 281.898 108.561 282.165L103.896 286.963ZM85.7023 285.097 78.9714 284.697C81.7704 280.898 84.6361 281.565 88.9012 282.231 87.7683 284.164 87.635 283.431 85.7023 285.097ZM113.826 286.763 107.095 286.83C110.293 282.231 112.559 281.098 118.291 280.632L113.826 286.763ZM69.3749 282.964C71.1743 279.566 76.439 280.099 79.5046 281.565 74.6397 284.497 75.3061 284.231 69.3749 282.964ZM42.4513 282.431C47.1163 279.232 47.9826 279.965 52.9808 281.432 48.5158 284.964 47.7161 283.631 42.4513 282.431ZM421.248 280.565C425.579 277.7 427.379 278.899 432.11 280.698 427.779 283.631 426.246 282.231 421.248 280.565ZM375.131 280.698C380.796 279.566 382.662 281.698 386.993 284.83L382.862 286.296 375.131 280.698ZM437.842 279.299C442.173 278.099 444.306 277.7 448.305 280.099 444.839 282.498 440.774 281.898 437.842 279.299ZM415.25 281.032C411.918 283.564 408.786 282.231 405.32 280.165 408.252 277.766 411.651 278.966 415.25 281.032ZM382.528 279.366C385.727 277.766 390.592 280.232 392.858 282.564 388.793 284.564 385.327 282.431 382.528 279.366ZM230.517 293.494 221.92 286.896 227.918 269.436 236.581 278.566 230.517 293.494ZM203.06 294.827 194.73 291.095 200.794 274.434 209.458 279.899 203.06 294.827ZM59.3785 280.765C63.3104 278.299 65.7096 277.966 69.7748 279.899 65.2431 282.898 64.6433 282.564 59.3785 280.765ZM43.0511 279.299C39.7856 282.365 37.2532 281.765 33.1214 279.832 37.1866 277.233 38.3195 277.433 43.0511 279.299ZM116.425 286.43C119.69 280.898 122.023 279.699 128.154 278.166 126.421 284.564 122.956 286.63 116.425 286.43ZM388.859 277.7C391.925 275.967 396.523 278.099 398.856 280.299 395.257 282.365 391.658 280.232 388.859 277.7ZM338.078 277.966 342.476 275.167C345.142 277.1 350.473 283.764 351.34 286.63L347.141 288.962 338.078 277.966ZM50.3818 278.499C54.2471 275.634 55.7798 276.567 60.2449 277.833 56.5129 280.698 54.447 280.698 50.3818 278.499ZM427.512 277.033C431.91 275.167 433.91 275.434 438.042 277.433 434.176 280.099 431.377 279.299 427.512 277.033ZM411.118 277.1C415.25 275.634 417.916 275.767 421.781 278.233 417.716 280.165 414.783 279.432 411.118 277.1ZM85.769 279.899C89.2344 276.5 91.367 276.633 96.2319 277.166 91.9001 281.098 91.9667 281.032 85.769 279.899ZM76.9722 278.699C79.5712 275.834 83.1033 275.234 86.2355 277.366 82.7034 279.832 81.3039 280.365 76.9722 278.699ZM443.973 276.167C448.105 274.434 450.77 274.634 454.636 276.633 450.97 278.966 447.305 278.633 443.973 276.167ZM101.163 280.632 95.0323 280.632C100.497 277.566 96.9649 276.433 105.962 276.433L101.163 280.632ZM405.054 277.633C401.455 279.965 398.722 278.299 395.19 276.1 395.257 276.1 394.657 273.701 400.655 275.567 402.388 276.1 403.787 276.7 405.054 277.633ZM290.295 275.367 295.826 265.171C298.692 268.969 300.758 273.501 303.09 277.433 306.356 282.831 303.424 283.764 299.492 288.629L290.295 275.367ZM192.997 289.829 184.6 287.096 190.531 270.102C193.597 270.436 196.329 271.902 198.928 273.568L192.997 289.829ZM133.218 284.83 126.154 285.83C128.22 279.965 131.952 275.834 138.283 275.434L133.218 284.83ZM66.8425 277.166C70.5745 273.701 72.3738 274.834 77.1055 275.634 72.907 278.832 72.2405 278.766 66.8425 277.166ZM40.3188 276.3C44.9837 273.968 45.6502 273.968 50.5817 275.767 47.2496 278.699 44.1174 278.299 40.3188 276.3ZM418.115 274.367C421.114 271.835 424.713 273.235 428.045 274.901 423.847 277.233 422.114 276.767 418.115 274.367ZM344.675 274.367 348.94 272.835C351.14 274.634 357.337 281.565 357.804 283.831L353.206 285.497 344.675 274.367ZM105.229 280.099C107.228 276.367 110.893 274.634 115.358 274.701 113.426 279.032 110.094 280.565 105.229 280.099ZM67.0424 274.434C63.4437 277.233 61.9109 276.7 57.6458 275.167 57.6458 275.167 58.1123 273.301 61.6444 273.035 63.5104 272.835 65.5763 273.701 67.0424 274.434ZM433.776 273.568C438.108 272.302 440.241 271.902 444.173 274.301 440.507 276.7 436.975 276.1 433.776 273.568ZM401.255 273.701C405.054 272.035 408.119 273.035 411.585 274.967 407.653 277.233 404.72 275.9 401.255 273.701ZM256.107 273.434C257.04 269.503 260.239 259.706 262.438 257.04 267.17 266.637 275.967 271.768 264.838 284.697L256.107 273.434ZM247.444 287.763 238.914 278.766 244.845 260.972C246.311 262.172 253.108 271.568 253.508 273.101 253.975 275.367 248.977 285.897 247.444 287.763ZM460.767 272.968C456.901 276.034 454.636 274.901 450.171 273.235 454.369 270.302 455.702 271.702 460.767 272.968ZM351.673 272.435 355.738 271.635 364.602 281.232 359.87 282.831 351.673 272.435ZM314.22 272.901C315.752 270.169 317.485 268.37 319.951 267.17L328.548 281.765 323.483 286.563 314.22 272.901ZM135.951 284.497C137.684 277.566 140.882 273.101 148.213 272.035 147.413 274.767 145.814 277.766 144.548 280.565 142.482 285.164 140.816 284.63 135.951 284.497ZM279.032 258.84C280.832 260.639 284.764 268.703 286.43 271.768 288.362 275.167 288.362 275.167 286.43 278.633 285.164 280.832 283.831 283.564 282.165 285.297 280.898 283.964 274.234 274.434 273.768 272.968 272.901 270.369 277.5 261.572 279.032 258.84ZM83.9696 274.367C86.9686 271.302 89.4343 271.568 93.5662 272.235 90.5672 275.234 88.4347 275.967 83.9696 274.367ZM47.8494 273.035C51.648 270.369 53.9805 270.436 58.1123 272.435 53.6473 274.834 52.581 274.967 47.8494 273.035ZM423.58 271.435C427.512 269.169 430.511 269.769 433.976 271.635 429.978 274.234 427.912 273.434 423.58 271.435ZM407.453 270.969C411.585 269.636 413.517 270.036 417.449 272.102 414.317 274.567 410.385 273.235 407.453 270.969ZM378.596 271.035C382.862 271.035 385.461 272.501 388.659 275.234 384.927 277.3 381.129 273.901 378.596 271.035ZM382.462 276.9C378.33 278.433 375.398 274.834 372.132 271.902 372.265 271.768 372.532 271.435 372.665 271.635 372.732 271.768 373.065 271.369 373.198 271.369 374.398 271.302 373.398 270.969 375.064 271.435 375.464 271.568 377.197 272.701 377.73 272.968 379.996 274.434 381.129 274.901 382.462 276.9ZM365.335 271.302C370.466 270.769 373.132 275.101 376.864 278.166L372.199 279.032 365.335 271.302ZM358.204 271.635C366.068 270.902 365.201 275.167 370.533 279.366L366.801 280.765 358.204 271.635ZM120.823 278.233 114.425 279.299C116.558 275.234 120.356 271.635 125.155 271.302L120.823 278.233ZM93.1663 274.967C96.2985 271.568 98.7643 271.568 103.363 271.502 98.6976 274.301 101.63 276.1 93.1663 274.967ZM73.9066 273.301C78.3717 270.036 78.9714 270.835 84.3695 271.568 78.6382 274.034 81.9704 274.967 73.9066 273.301ZM439.974 270.502C445.106 268.236 445.306 269.303 450.57 270.569 446.705 273.035 443.84 273.035 439.974 270.502ZM384.794 270.635C388.593 269.836 392.191 271.169 394.724 273.701 391.192 275.3 387.993 272.768 384.794 270.635ZM182.934 286.097 174.737 284.564 180.735 267.237 188.865 269.103 182.934 286.097ZM64.3101 271.568C68.1753 268.57 69.9081 268.969 74.573 270.502 69.7748 273.634 69.9747 272.835 64.3101 271.568ZM456.235 269.436C460.833 267.903 461.966 267.503 466.831 269.969 462.633 272.035 460.034 271.902 456.235 269.436ZM391.792 269.036C395.257 268.436 398.589 269.702 401.322 271.702 397.323 273.035 394.324 272.035 391.792 269.036ZM220.12 285.03 211.923 279.366C212.19 275.3 216.055 263.371 217.921 259.44 220.254 260.306 221.253 261.972 223.253 263.905 225.985 266.57 226.851 266.504 225.718 270.702 224.985 273.368 221.32 283.098 220.12 285.03ZM103.429 274.301C105.029 271.102 108.694 269.902 112.559 269.702 109.027 273.634 109.094 274.767 103.429 274.301ZM414.184 268.636C416.983 266.237 420.981 267.503 424.047 269.169 420.115 271.235 417.916 270.769 414.184 268.636ZM152.611 283.231 145.281 283.631C146.214 280.099 148.08 277.1 149.546 273.968 151.545 269.769 153.744 269.303 158.609 269.036L152.611 283.231ZM55.2467 269.436C58.912 266.837 60.4448 266.97 64.7766 268.769 62.0442 271.435 59.0453 271.369 55.2467 269.436ZM429.845 267.903C433.91 266.304 436.442 266.37 440.041 268.57 436.575 270.902 432.91 270.169 429.845 267.903ZM398.189 267.77C401.255 266.037 404.587 267.437 407.586 269.169 403.387 270.835 401.788 270.169 398.189 267.77ZM172.671 284.164 164.874 283.364 170.672 266.97 178.669 266.903 172.671 284.164ZM446.105 266.903C450.37 265.704 452.703 265.304 456.435 267.57 452.57 269.769 449.371 269.569 446.105 266.903ZM162.075 283.031 155.144 283.164C155.477 279.899 159.409 271.369 161.075 268.17L168.606 267.037C167.606 270.702 164.141 280.698 162.075 283.031ZM123.889 277.3C125.688 272.235 129.62 267.836 134.951 266.304 133.085 273.035 131.019 276.1 123.889 277.3ZM90.8338 269.902C94.6324 267.237 95.5654 267.037 100.43 267.237 96.765 270.835 96.2985 270.702 90.8338 269.902ZM81.3039 269.236C84.4361 266.237 86.9019 266.837 91.167 267.303 86.4354 270.436 87.5683 270.502 81.3039 269.236ZM462.299 266.37C465.832 264.304 469.364 264.238 472.762 266.637 468.764 268.703 466.231 268.236 462.299 266.37ZM72.2405 267.836C75.0395 264.638 77.5053 265.704 81.5705 266.504 78.5716 269.503 76.2391 269.169 72.2405 267.836ZM419.715 265.237C424.047 263.971 425.913 263.971 429.911 265.904 426.246 268.503 422.914 267.57 419.715 265.237ZM403.654 265.504C407.519 263.771 409.585 264.371 413.451 266.17 410.518 268.703 407.053 267.303 403.654 265.504ZM320.951 265.437 325.816 261.172 335.145 276.767 330.414 280.765 320.951 265.437ZM112.759 272.635C114.225 269.169 118.49 266.77 122.356 265.704 119.423 270.502 118.89 272.635 112.759 272.635ZM101.23 269.436C103.896 266.437 105.828 265.904 109.96 265.704 106.228 268.036 108.427 270.102 101.23 269.436ZM436.042 264.838C440.374 262.905 442.24 263.238 446.239 264.971 442.773 267.503 439.641 267.17 436.042 264.838ZM62.4441 265.97C65.3097 263.038 68.242 263.771 72.1073 264.971 68.5752 268.036 66.9091 267.437 62.4441 265.97ZM452.17 263.771C456.901 261.705 457.834 262.438 462.699 263.971 459.234 266.504 455.635 266.17 452.17 263.771ZM388.46 263.638C391.125 262.438 394.791 263.838 397.056 265.837 393.324 266.77 391.392 265.97 388.46 263.638ZM381.462 264.038C384.861 263.971 388.726 265.237 391.125 267.503 385.86 268.57 385.527 266.77 381.462 264.038ZM375.131 264.171C378.396 263.905 382.728 266.304 384.728 268.503 378.996 269.436 378.93 265.97 375.131 264.171ZM468.297 263.238C472.163 260.439 473.962 261.106 478.494 263.038 475.695 266.037 472.296 264.371 468.297 263.238ZM410.518 262.838C413.65 260.772 416.849 261.772 419.915 263.438 416.249 265.771 413.717 264.838 410.518 262.838ZM369 263.038C372.532 263.705 376.197 266.637 378.73 269.103L374.864 269.303 369 263.038ZM302.691 254.108 311.954 272.635 306.423 279.299C303.89 276.034 301.024 270.302 298.959 266.637 297.226 263.371 296.626 263.705 298.625 260.372 299.825 258.24 301.158 255.907 302.691 254.108ZM88.7013 265.104C92.5665 262.438 93.0997 262.572 97.8979 263.105 94.9657 266.104 93.0997 265.837 88.7013 265.104ZM78.9048 264.171C82.3036 261.772 83.8363 261.239 87.9682 262.772 85.3025 265.97 83.1033 264.971 78.9048 264.171ZM426.046 262.305C431.177 260.239 431.511 261.439 436.109 262.372 432.91 265.171 429.245 264.771 426.046 262.305ZM394.457 262.039C397.323 260.506 401.055 261.905 403.521 263.438 399.855 265.104 397.323 264.104 394.457 262.039ZM110.893 267.437C112.226 264.971 115.958 262.972 119.09 262.372 116.025 265.97 116.491 267.503 110.893 267.437ZM98.6976 265.104C101.697 262.572 103.029 262.438 107.028 262.305 104.029 265.171 103.029 265.504 98.6976 265.104ZM442.24 261.306C446.172 259.706 448.771 259.973 452.303 261.839 449.038 264.104 445.106 263.771 442.24 261.306ZM362.402 261.306C365.668 261.972 370.799 266.504 372.665 269.169 368.267 269.836 369.733 269.303 366.867 266.504 364.801 264.504 364.002 263.771 362.402 261.306ZM133.485 274.367C134.885 268.503 139.083 262.372 144.948 260.372 144.348 263.371 142.682 266.304 141.549 269.236 139.816 273.501 138.017 273.701 133.485 274.367ZM69.3083 262.305C73.2402 260.173 75.4394 259.573 79.1047 261.839 74.8396 263.971 73.7733 264.038 69.3083 262.305ZM458.234 260.639C462.166 258.64 465.032 258.706 468.697 260.906 464.765 262.972 462.166 262.838 458.234 260.639ZM237.314 276.633C231.516 270.036 227.918 269.503 229.517 263.971 230.85 259.24 232.982 249.843 235.115 246.111 237.581 249.31 243.645 256.507 243.245 260.173 243.045 261.905 238.314 274.967 237.314 276.633ZM122.089 269.969C123.289 265.637 127.487 262.039 131.619 259.839 130.42 265.504 127.621 268.503 122.089 269.969ZM415.916 259.706C420.181 258.173 421.648 258.307 425.646 260.372 422.247 262.572 419.048 261.972 415.916 259.706ZM399.722 259.906C403.721 258.107 406.12 259.04 409.918 260.839 405.32 262.572 404.32 261.905 399.722 259.906ZM327.615 259.839 332.48 257.374 341.743 273.434 336.878 276.034 327.615 259.839ZM210.057 277.566C207.192 276.633 203.593 274.501 201.527 271.968L207.792 250.709 215.989 257.107C215.855 262.105 211.79 272.302 210.057 277.566ZM108.961 263.105 115.825 259.573C114.225 262.305 111.76 264.304 108.961 263.105ZM441.907 259.506C438.775 262.039 436.176 261.172 432.577 259.573 434.976 256.707 438.441 257.773 441.907 259.506ZM378.663 258.706C381.862 258.84 384.394 259.906 386.927 261.705 383.395 262.172 381.129 261.172 378.663 258.706ZM355.338 259.173C359.537 259.373 364.535 265.837 366.534 269.169 361.603 269.836 362.802 269.303 359.87 265.304 358.937 264.038 355.938 259.573 355.338 259.173ZM96.0986 260.906C99.3641 258.64 100.43 258.573 104.429 258.906 101.763 261.639 100.097 261.372 96.0986 260.906ZM86.702 260.173C89.3677 257.64 91.5669 258.04 95.3655 258.906 92.4999 261.439 90.7005 261.639 86.702 260.173ZM458.568 258.373C454.902 260.772 452.703 260.506 448.638 258.44 451.37 255.841 454.902 256.974 458.568 258.373ZM384.194 257.84C388.659 257.773 389.326 258.44 393.391 259.906 389.926 261.372 386.993 260.306 384.194 257.84ZM368.134 257.773 374.198 261.505C370.999 261.705 370 260.306 368.134 257.773ZM348.607 257.307C353.072 257.574 353.072 258.906 355.471 262.172 356.938 264.171 359.537 267.17 360.47 269.236L356.404 269.636 348.607 257.307ZM335.012 256.574C340.877 255.374 340.077 257.24 343.009 262.238 344.675 265.104 346.675 267.97 347.941 270.969L343.542 272.768C341.543 269.569 335.012 259.173 335.012 256.574ZM119.957 264.171C122.289 262.638 123.289 258.973 127.621 257.04 126.887 260.706 124.355 263.571 119.957 264.171ZM77.1055 258.706C80.2376 256.441 83.0366 256.174 85.769 258.44 82.4369 260.306 80.7041 260.439 77.1055 258.706ZM405.853 257.107C409.852 255.574 411.918 255.907 415.516 257.907 412.318 260.239 408.985 258.973 405.853 257.107ZM285.63 242.846C288.296 246.044 290.828 253.708 292.827 257.973 295.027 262.838 296.359 262.638 289.229 273.168 277.1 255.774 280.565 253.975 285.63 242.846ZM119.423 259.373 123.222 255.907C122.556 258.507 122.356 258.906 119.423 259.373ZM438.575 256.241C441.574 253.508 444.573 254.575 447.971 256.307 444.772 258.507 441.907 258.04 438.575 256.241ZM431.711 256.974C429.178 259.773 425.846 258.706 422.647 257.174 424.913 254.575 427.712 255.174 431.711 256.974ZM390.059 256.307C394.257 255.708 395.524 256.041 399.056 257.907 395.857 259.573 392.658 258.573 390.059 256.307ZM362.336 255.508C364.668 256.707 366.401 258.107 367.534 260.306 365.068 260.039 366.134 261.239 364.002 258.573 363.668 258.173 362.402 255.574 362.336 255.508ZM341.477 256.041C347.274 255.974 346.275 257.174 349.274 261.839 351.073 264.571 353.006 266.903 354.405 269.836L349.873 270.635 341.477 256.041ZM254.708 270.969C243.379 259.173 244.911 256.707 251.976 237.581 261.505 252.575 262.838 255.374 254.708 270.969ZM142.882 271.568C143.082 268.636 145.481 264.238 146.814 261.306 148.68 257.04 150.479 255.641 154.877 253.908 154.677 257.44 152.611 261.505 151.412 265.038 149.413 270.702 148.68 269.902 142.882 271.568ZM396.79 254.575C399.855 252.642 402.721 253.908 405.72 255.041 402.388 257.44 399.855 256.507 396.79 254.575ZM268.57 237.114C271.835 240.113 274.701 248.843 276.833 253.375 278.966 257.973 274.501 265.771 271.902 269.969 259.973 252.109 262.772 254.841 268.57 237.114ZM103.629 256.774C106.628 254.708 107.295 254.575 111.027 254.841 108.561 257.174 107.295 256.974 103.629 256.774ZM93.2329 256.707C97.4314 253.908 97.2981 254.441 102.563 254.974 98.0979 257.773 99.3641 257.44 93.2329 256.707ZM428.845 253.775C431.377 251.376 434.843 252.242 437.908 253.975 434.976 256.374 431.91 255.574 428.845 253.775ZM421.781 254.575C419.248 257.24 415.916 256.307 412.717 254.708 413.184 254.175 412.851 253.975 414.05 253.442 415.583 252.842 414.517 252.975 416.516 253.108 417.849 253.242 420.515 254.041 421.781 254.575ZM84.2362 255.241C87.4351 252.575 89.1678 253.575 93.4329 254.175 89.9008 256.307 89.2344 257.374 84.2362 255.241ZM380.396 253.175C383.861 252.642 386.46 253.042 389.259 255.041 385.394 255.841 383.661 254.974 380.396 253.175ZM304.223 251.176 309.155 244.512 318.751 265.171 313.153 270.635C311.221 266.704 304.69 255.041 304.223 251.176ZM131.219 265.837C132.485 260.306 137.217 254.708 141.482 252.109 139.749 259.506 138.017 262.638 131.219 265.837ZM418.249 251.176C421.981 249.576 424.78 249.576 428.178 251.842 424.38 253.642 421.581 253.375 418.249 251.176ZM411.784 252.242C409.252 254.708 406.053 253.642 402.788 252.175 404.254 250.576 404.121 250.509 406.586 250.776 407.919 250.909 410.452 251.709 411.784 252.242ZM395.923 252.575C391.658 254.241 390.659 253.308 386.394 251.842 389.259 250.043 392.458 251.043 395.923 252.575ZM101.163 252.842C103.563 250.643 106.162 250.576 108.827 251.909 105.495 253.575 105.029 253.442 101.163 252.842ZM354.272 249.776 361.869 258.373C356.671 257.707 356.404 252.375 354.272 249.776ZM90.8338 251.776C95.3655 249.51 94.9657 249.776 100.23 250.776 96.4984 253.375 95.4988 253.175 90.8338 251.776ZM371.866 249.576 378.263 251.043C375.598 252.375 373.998 251.309 371.866 249.576ZM199.794 271.235 191.464 268.036 197.729 244.845 205.926 248.977C205.926 252.375 201.261 268.103 199.794 271.235ZM408.252 248.71C412.318 247.377 414.45 247.311 418.315 249.376 414.45 251.442 411.518 250.976 408.252 248.71ZM401.721 249.843C399.056 252.042 396.39 250.976 392.725 249.643 394.124 248.244 393.991 247.977 396.657 248.31 398.056 248.443 400.389 249.243 401.721 249.843ZM159.609 266.637 152.145 268.503C153.544 264.171 155.011 260.106 156.477 256.041 158.609 249.91 160.009 250.443 165.34 247.577L159.609 266.637ZM385.194 249.843C382.462 251.309 380.129 250.176 377.197 248.71 377.797 248.377 375.664 248.31 378.863 248.044 380.996 247.844 383.328 248.777 385.194 249.843ZM129.62 258.44C132.352 252.575 134.485 251.709 138.083 247.444 136.617 252.775 134.618 256.441 129.62 258.44ZM107.961 249.043C111.36 247.444 111.426 247.311 115.225 248.044 112.959 249.976 110.893 250.11 107.961 249.043ZM97.9646 248.51C101.23 245.911 102.963 246.644 106.961 247.444 103.563 249.91 102.23 249.31 97.9646 248.51ZM398.323 246.311C402.721 245.111 404.054 245.045 408.252 246.844 404.654 249.243 401.455 248.377 398.323 246.311ZM382.528 246.311C386.26 245.378 388.126 245.711 391.658 247.444 388.926 249.443 385.327 248.244 382.528 246.311ZM169.605 265.038 161.875 266.037C162.675 261.839 164.674 256.374 165.94 252.375 168.006 245.978 168.206 244.778 175.47 243.712L169.605 265.038ZM346.808 243.845C349.207 245.178 354.872 253.775 356.271 256.774 350.673 256.507 349.14 247.777 346.808 243.845ZM226.851 265.371 218.788 256.974C218.987 253.108 223.386 234.982 225.119 232.316 230.317 238.047 234.049 242.246 232.649 247.177 231.25 252.242 229.25 261.439 226.851 265.371ZM179.602 265.038 171.871 264.971C171.938 261.505 174.337 254.708 175.403 251.109 177.869 242.846 176.07 241.913 185.6 242.179L179.602 265.038ZM388.46 243.845C393.125 242.246 394.124 243.312 398.123 244.245 394.791 246.911 391.658 245.911 388.46 243.845ZM373.065 243.912C376.331 242.646 378.197 243.179 381.462 244.778 378.73 246.511 375.864 245.445 373.065 243.912ZM310.621 242.312 315.752 237.647 325.349 259.373 320.218 263.305 310.621 242.312ZM189.731 267.037 181.601 265.504 187.665 242.246 196.062 243.978 189.731 267.037ZM140.416 260.173C142.282 253.975 146.414 245.378 151.479 242.179 150.879 248.91 147.547 258.44 140.416 260.173ZM104.895 245.111C108.361 243.312 110.027 242.579 113.759 244.445 109.96 246.378 109.161 245.978 104.895 245.111ZM378.663 241.513C382.328 240.246 384.461 240.38 387.86 242.179 384.728 244.378 381.662 243.179 378.663 241.513ZM364.402 241.379C366.801 240.18 368.533 240.713 371.332 242.179 368.467 243.645 367.001 242.846 364.402 241.379ZM292.694 228.451C295.027 232.449 297.492 240.246 299.292 244.845 300.291 247.377 302.091 250.309 301.091 252.842 300.291 254.908 297.626 258.973 296.226 260.506 293.094 255.907 291.161 249.843 288.829 244.578 286.963 240.513 286.363 240.513 288.562 236.248 289.829 233.782 290.961 230.05 292.694 228.451ZM138.75 251.642C139.416 247.444 142.615 240.846 146.547 239.18 145.747 248.177 141.815 247.111 138.75 251.642ZM111.96 241.579C116.025 239.58 116.425 240.113 120.823 240.913 117.491 242.712 115.691 243.379 111.96 241.579ZM369 239.18C372.799 237.847 374.131 237.914 377.93 239.78 374.998 241.779 372.065 240.713 369 239.18ZM339.344 238.647C343.809 240.313 348.274 250.643 350.407 255.041 345.742 254.841 345.542 252.442 343.676 248.443 342.143 245.245 340.277 242.046 339.344 238.647ZM126.421 237.847C123.689 240.18 122.822 239.647 118.957 238.514 121.889 236.781 123.022 236.714 126.421 237.847ZM150.079 254.308C150.279 250.709 152.278 246.777 153.478 243.179 155.211 238.114 155.144 235.448 160.875 234.848 160.075 242.246 157.743 251.509 150.079 254.308ZM147.947 242.979C148.413 238.714 148.746 237.381 153.078 237.314L147.947 242.979ZM368.134 237.714C364.002 238.914 363.535 238.247 359.67 237.114 362.536 235.115 365.868 235.848 368.134 237.714ZM332.613 236.448C337.878 237.381 337.211 238.78 339.477 243.512 341.077 246.844 343.209 250.443 344.009 253.908 338.611 254.308 339.144 251.576 337.078 246.711 335.612 243.312 333.679 239.98 332.613 236.448ZM317.818 235.781 322.55 233.649 331.68 255.241 326.882 258.173C323.616 251.909 319.418 242.112 317.818 235.781ZM244.512 255.907C242.446 253.575 236.315 245.578 235.981 242.912 235.715 240.78 239.18 227.451 240.246 225.185L245.578 225.052C246.777 226.985 248.244 230.117 249.31 232.183 251.176 235.781 250.509 235.981 249.376 239.913 248.11 244.378 246.111 252.242 244.512 255.907ZM159.409 248.643C159.476 245.578 161.008 242.246 161.808 239.114 163.208 233.516 162.874 233.182 169.139 232.982 168.939 236.115 168.139 240.38 166.94 243.445 165.873 245.911 162.741 247.444 159.409 248.643ZM325.016 234.715C331.613 235.115 330.947 237.181 333.413 242.912 334.612 245.578 337.611 251.709 337.878 254.108L333.413 254.908 325.016 234.715ZM126.154 235.515C128.887 233.449 131.219 233.649 134.218 235.248 130.753 236.448 129.886 236.981 126.154 235.515ZM272.235 226.518 279.632 226.585C280.965 228.851 284.63 238.114 284.43 240.38 284.297 242.046 280.032 251.842 279.099 253.375 276.967 250.776 274.567 244.045 272.968 240.646 269.436 233.182 269.103 234.515 272.235 226.518ZM261.572 251.442C254.641 240.913 250.776 234.049 255.708 225.319L264.638 225.785C266.97 231.65 268.57 232.049 266.504 238.047 264.971 242.312 263.838 247.91 261.572 251.442ZM318.951 233.116C316.552 234.315 317.952 234.582 316.152 233.116 316.819 232.649 315.486 231.316 318.951 233.116ZM306.822 230.65C315.752 230.917 318.152 234.582 310.354 240.246 308.888 238.114 306.756 233.516 306.822 230.65ZM176.47 241.246 169.205 243.779C169.006 240.18 170.472 235.848 171.538 232.383L178.535 230.85 176.47 241.246ZM132.152 232.649C135.818 230.517 137.284 230.183 141.016 232.116 137.35 234.249 136.151 233.582 132.152 232.649ZM331.613 229.784C331.48 227.518 334.612 230.25 332.613 230.183 331.48 230.117 331.68 231.383 331.613 229.784ZM295.093 229.051 304.357 229.85C308.622 240.313 310.354 241.246 303.024 249.51L295.093 229.051ZM216.788 255.241 208.591 248.91 213.389 226.451C223.919 224.785 223.719 229.184 221.653 237.048 220.52 241.646 218.588 251.442 216.788 255.241ZM186.333 240.18 178.735 240.713C178.869 237.314 179.668 232.982 181.334 230.317L188.598 228.851 186.333 240.18ZM206.859 247.111 198.595 243.112 201.527 227.651 211.19 226.252 206.859 247.111ZM196.062 241.979 188.665 240.38 190.798 228.584 199.395 227.718C199.195 231.849 198.262 238.914 196.062 241.979ZM282.031 227.384 289.962 227.784C290.028 230.317 287.363 234.782 286.097 237.248L282.031 227.384ZM234.382 240.78C232.049 239.18 230.317 236.181 228.584 233.849 225.585 229.784 225.185 230.717 226.185 225.652L237.714 225.319 234.382 240.78ZM139.15 229.184C142.349 227.851 141.416 227.718 144.881 228.451 143.481 231.45 141.349 230.383 139.15 229.184ZM222.253 225.785C225.385 226.252 223.453 224.719 224.119 227.318L222.253 225.785ZM266.437 225.852 270.302 225.852 268.903 230.583 266.437 225.852ZM333.613 223.919 336.612 227.718C332.88 226.585 334.279 225.319 333.613 223.919ZM247.844 225.252 252.975 225.119 251.576 231.983 247.844 225.252ZM182.067 223.919C183.734 222.853 182.467 221.986 185.6 223.852 182.734 225.185 184.067 225.252 182.067 223.919ZM320.084 220.187C323.683 221.72 324.416 222.12 325.749 225.452L321.217 224.652 320.084 220.187ZM299.892 222.32C297.426 223.586 297.892 224.119 295.693 222.186 297.692 220.853 297.492 221.12 299.892 222.32ZM164.674 219.121C164.807 223.119 165.274 220.92 163.141 221.453L164.674 219.121ZM203.326 219.654C205.259 219.121 206.259 218.788 208.525 219.92 205.059 222.186 205.326 221.387 203.326 219.654ZM297.426 218.921C298.225 217.321 297.492 217.921 299.292 217.521 299.558 219.92 300.758 221.253 297.426 218.921ZM177.003 218.654C179.468 217.921 178.935 217.521 181.468 219.054 179.735 221.853 178.735 220.72 177.003 218.654ZM274.834 218.254C277.5 216.255 275.5 216.588 277.033 218.454 274.501 219.92 275.767 218.921 274.834 218.254ZM260.039 217.788 262.372 219.254C259.773 219.054 261.106 219.187 260.039 217.788ZM217.321 219.454C219.987 216.722 221.653 217.721 224.719 218.654 221.653 220.987 221.12 220.32 217.321 219.454ZM195.329 218.254C198.262 216.722 196.529 216.988 199.261 218.188 194.463 220.92 196.196 219.054 195.329 218.254ZM233.849 218.654C236.248 216.388 237.248 216.188 239.847 218.121 237.581 219.987 236.315 219.254 233.849 218.654ZM252.842 216.588C250.043 218.121 250.976 217.988 247.844 217.055 250.443 215.589 249.91 215.389 252.842 216.588ZM301.424 211.923C305.49 211.923 307.955 217.721 309.222 221.253L303.157 220.387C301.624 217.321 300.425 215.056 301.424 211.923ZM268.036 217.188 263.505 216.455C266.104 214.656 266.37 214.322 268.036 217.188ZM324.683 214.522C327.082 217.988 330.281 219.387 330.68 224.652 326.149 222.653 325.682 218.987 324.683 214.522ZM310.421 215.056C314.886 215.855 316.752 219.521 318.352 223.652L313.22 222.853 310.421 215.056ZM198.062 215.122C200.861 213.656 200.328 212.923 202.66 214.989 201.261 217.388 199.928 216.522 198.062 215.122ZM184.733 217.321C182.934 214.322 184 215.189 186.266 215.122 185 218.987 185.266 216.455 184.733 217.321ZM258.04 214.123C258.04 212.923 259.906 212.523 259.373 214.123 258.906 215.722 257.973 216.188 258.04 214.123ZM242.646 214.189 247.311 215.122C244.578 216.722 244.445 216.988 242.646 214.189ZM215.989 213.989C217.988 212.523 218.054 212.457 221.053 214.123 218.854 216.122 217.855 216.788 215.989 213.989ZM274.901 213.19C272.435 215.455 273.634 214.789 269.303 213.723 269.436 213.589 271.435 212.523 271.568 212.523 276.5 211.457 273.568 212.257 274.901 213.19ZM194.13 214.656C194.263 211.457 193.397 212.856 196.529 212.523 196.129 213.723 196.995 213.123 195.596 214.189 194.53 214.989 194.13 214.656 194.13 214.656ZM192.197 211.857C192.064 215.122 192.997 214.256 189.798 213.789 190.998 210.724 189.332 212.59 192.197 211.857ZM176.003 215.389C176.136 211.79 176.336 212.99 179.002 213.456 177.336 215.855 179.069 214.522 176.003 215.389ZM277.9 211.657 285.164 210.057C286.696 212.656 288.229 214.989 288.696 217.521 280.565 217.055 281.898 216.788 277.9 211.657ZM245.178 212.457C247.51 210.391 250.709 210.124 252.309 213.123 249.31 213.923 247.977 213.856 245.178 212.457ZM229.317 213.323C233.182 210.657 234.049 212.523 237.781 212.79 235.315 215.522 232.383 214.922 229.317 213.323ZM205.992 211.457C207.258 210.924 203.926 210.657 207.858 210.924 210.191 211.124 209.058 211.124 210.324 211.79 208.191 214.322 208.258 214.056 205.992 211.457ZM288.962 210.191 294.56 210.057 297.426 215.655 294.76 219.254C289.562 217.855 290.495 214.589 288.962 210.191ZM306.023 207.392C307.222 210.191 307.222 208.791 306.289 210.924 304.623 208.191 304.557 209.391 306.023 207.392ZM260.706 205.926 264.438 212.123 260.173 211.457C259.706 208.258 259.373 208.525 260.706 205.926ZM239.313 208.991C242.712 209.658 241.579 208.458 241.513 211.457 239.38 210.724 240.18 211.59 239.313 208.991ZM216.855 209.124C218.788 208.058 216.722 208.258 219.054 209.258 215.122 212.123 217.855 209.857 216.855 209.124ZM156.144 219.587C156.81 214.456 159.276 211.79 162.075 207.792 161.675 211.59 159.609 217.655 156.144 219.587ZM251.642 208.325C252.909 206.392 253.842 205.792 256.174 205.326L257.64 210.657C254.175 210.657 253.575 210.857 251.642 208.325ZM230.383 209.191C232.183 206.592 231.583 206.859 234.715 208.325 232.716 209.724 232.449 210.457 230.383 209.191ZM322.084 206.659C324.949 207.925 325.349 209.658 324.283 211.923 321.55 208.791 323.017 209.458 322.084 206.659ZM308.555 204.993C310.821 205.859 309.821 204.859 311.154 208.191 312.021 210.391 312.42 211.124 312.687 213.323 308.555 212.656 306.822 207.458 308.555 204.993ZM278.299 209.191 280.965 201.194 284.231 208.258 278.299 209.191ZM245.445 201.994C247.644 204.459 248.443 205.059 247.977 207.925L244.312 208.458 245.445 201.994ZM226.052 208.858C226.651 207.592 225.052 207.858 228.118 207.058 228.051 210.324 228.118 208.591 226.052 208.858ZM191.464 207.325C190.131 210.057 189.665 209.791 186.133 208.591 187.332 207.058 184.067 207.925 188.199 206.859 189.998 206.392 190.331 206.925 191.464 207.325ZM315.553 205.992C318.951 207.592 321.484 215.255 322.217 218.854 316.819 216.922 317.219 210.99 315.553 205.992ZM205.659 207.658C207.125 205.659 207.192 205.592 210.257 207.258 210.124 207.325 208.458 208.191 208.391 208.191 205.659 208.525 207.458 208.725 205.659 207.658ZM218.987 205.859C220.587 204.859 218.254 204.526 222.586 205.059 223.652 205.192 225.119 205.992 225.852 206.392 222.453 207.725 221.52 208.858 218.987 205.859ZM161.941 216.988C162.075 213.256 166.406 208.258 168.672 205.059 168.206 209.658 165.873 214.722 161.941 216.988ZM196.062 204.993C197.595 204.06 195.663 204.259 197.995 204.259 199.728 204.193 199.328 204.526 200.394 204.993 198.128 207.058 197.795 207.325 196.062 204.993ZM231.516 202.593C233.116 205.792 232.249 204.06 231.65 205.326 231.316 204.593 229.517 205.059 231.516 202.593ZM207.525 204.593C210.457 201.927 212.123 201.994 215.322 204.326 212.523 206.859 211.057 204.993 207.525 204.593ZM176.936 209.591C177.602 206.925 177.869 206.125 179.135 203.593 181.068 199.861 180.735 199.461 184.733 200.994 185.133 205.992 182.001 209.124 176.936 209.591ZM261.639 201.794 268.57 197.462 273.768 209.791 266.903 212.257C264.504 209.458 261.972 205.259 261.639 201.794ZM186.732 205.526C187.132 202.393 186.932 202.127 189.998 203.393L186.732 205.526ZM221.253 202.527 226.718 195.063C230.05 198.262 229.65 199.994 229.184 204.193 226.518 204.459 223.186 203.926 221.253 202.527ZM198.128 201.994C201.527 200.527 202.927 199.328 204.993 202.66 202.393 203.86 200.994 203.526 198.128 201.994ZM242.312 207.458C238.38 207.325 238.181 207.192 236.248 203.793L242.912 196.662C244.911 199.794 243.179 204.126 242.312 207.458ZM168.939 213.323C169.339 209.258 172.338 199.661 176.936 199.594 176.803 204.859 174.47 212.39 168.939 213.323ZM319.484 198.462 322.55 203.926C319.618 203.326 319.218 201.261 319.484 198.462ZM210.591 198.728 212.59 200.727C208.058 200.594 210.457 200.861 210.591 198.728ZM305.689 198.595C306.756 198.995 306.956 198.595 307.889 199.994 310.088 203.326 308.155 202.927 307.689 203.726 304.89 201.594 306.423 201.594 305.689 198.595ZM249.31 190.065C251.509 191.931 254.441 200.261 254.974 203.46 252.842 204.926 252.442 205.459 249.976 205.859 247.111 200.194 244.112 195.663 249.31 190.065ZM204.126 189.931C207.658 193.663 206.525 193.997 205.659 198.795L199.461 199.061 204.126 189.931ZM271.102 196.862C273.568 195.196 276.1 194.33 278.832 194.196 280.765 199.195 278.499 203.993 276.034 208.791L271.102 196.862ZM207.525 199.794 208.058 195.129C209.458 196.729 209.524 195.463 209.258 197.395 208.991 199.461 207.658 199.594 207.525 199.794ZM295.227 194.196C299.625 194.596 298.759 196.529 300.358 200.861 301.424 203.993 303.29 206.392 301.957 209.191 297.692 209.324 298.625 207.458 297.426 202.993 296.693 200.061 295.427 197.062 295.227 194.196ZM300.425 196.062C303.957 197.062 305.823 202.86 304.423 205.859L300.425 196.062ZM284.164 192.997 287.896 192.597 293.96 208.058 288.162 208.191C286.896 204.526 283.364 196.329 284.164 192.997ZM262.572 198.262 265.237 188.132 268.17 195.729C265.704 197.262 264.838 197.995 262.572 198.262ZM290.695 192.93C294.56 193.597 296.626 204.593 296.893 208.391 294.693 206.992 290.828 195.996 290.695 192.93ZM235.382 201.86C231.983 197.329 232.183 196.995 233.316 191.597 234.049 188.065 234.182 184.2 236.181 181.201 237.981 182.867 241.446 192.131 242.046 194.73L235.382 201.86ZM176.336 197.862C175.403 195.529 176.869 193.064 178.735 190.864 178.669 193.93 178.869 196.262 176.336 197.862ZM218.854 202.46 221.52 185.666C223.719 187.332 225.185 190.798 225.852 193.463L218.854 202.46ZM230.383 194.196C228.517 191.064 229.117 191.198 231.716 188.465 231.783 190.531 231.916 193.064 230.383 194.196ZM286.23 187.266C287.629 189.132 287.296 187.532 287.563 190.664L285.03 190.798 286.23 187.266ZM252.509 186.932C253.975 188.932 254.841 191.664 255.774 194.196 257.174 197.595 258.706 199.261 257.374 202.06 255.174 199.594 250.11 189.398 252.509 186.932ZM197.529 198.328 199.594 184.533 202.86 187.532 197.529 198.328ZM309.755 185.666C311.954 187.865 311.954 189.398 312.953 192.397 314.553 197.595 315.752 198.395 314.42 201.594 311.754 197.462 310.288 190.731 309.755 185.666ZM288.896 181.734C290.428 183.8 291.561 187.932 291.894 190.798 289.095 190.798 289.562 191.397 288.696 187.732 287.963 184.866 287.763 184.467 288.896 181.734ZM254.108 185.133 262.172 178.202C265.237 183.6 262.905 194.396 259.906 199.261L254.108 185.133ZM250.243 180.135C251.976 182.601 252.442 183.467 250.709 186.066 248.843 183.334 249.443 183.534 250.243 180.135ZM193.597 202.327C190.798 202.527 191.198 202.527 189.132 200.594L197.462 181.934C197.995 185.933 195.996 199.728 193.597 202.327ZM185.733 198.995 182.401 198.262 188.465 182.134 185.733 198.995ZM216.055 201.86 211.857 196.929 219.721 182.934C219.987 187.532 218.121 198.328 216.055 201.86ZM271.835 171.538 277.966 191.664 270.569 194.663C269.369 191.997 266.837 185.333 266.77 182.734 266.704 178.935 268.57 173.137 271.835 171.538ZM180.401 197.595C178.336 193.263 182.334 185.066 186.066 182.801L180.401 197.595ZM304.623 180.401C307.689 183.067 307.622 185.466 308.622 189.332 309.488 192.797 311.487 196.929 310.421 199.395 308.422 196.796 304.423 183.734 304.623 180.401ZM210.79 195.129C207.792 191.264 208.791 190.998 209.724 186.266 210.324 183.067 210.524 178.869 211.99 176.203L218.388 181.668 210.79 195.129ZM167.339 197.729C167.406 190.731 170.405 186.932 173.604 181.401 173.204 185 169.272 195.263 167.339 197.729ZM292.894 175.936C294.427 179.402 297.026 188.732 296.893 192.33 292.894 191.664 293.227 188.998 292.161 185.066 290.895 180.535 289.229 178.869 292.894 175.936ZM242.379 172.738C245.711 179.269 250.509 186.932 243.779 192.997 242.379 190.531 240.713 186.266 239.447 183.267 237.447 178.535 238.38 175.87 242.379 172.738ZM207.258 190.065C203.06 187.132 207.991 182.467 209.058 178.869L207.258 190.065ZM188.332 197.662C188.132 192.597 189.998 181.201 192.264 177.403L196.062 180.468 188.332 197.662ZM313.02 176.403C315.553 181.134 318.618 188.932 319.018 194.396 316.419 191.931 313.287 180.268 313.02 176.403ZM227.651 189.998C218.921 180.735 223.986 172.671 229.917 163.141 234.982 172.138 236.514 182.801 227.651 189.998ZM299.092 175.603C302.824 177.269 306.356 192.264 306.756 196.729 302.091 195.129 301.024 180.401 299.092 175.603ZM253.642 183.2C251.242 177.669 250.976 176.203 252.375 170.272 253.308 166.073 253.842 161.008 255.708 157.343 257.973 161.475 261.239 171.338 261.439 176.07 258.773 179.335 256.307 180.401 253.642 183.2ZM198.262 174.604 196.662 178.602 193.93 175.87 198.262 174.604ZM183.2 182.201 184.133 174.737C187.066 174.803 187.066 174.737 189.065 176.203L183.2 182.201ZM267.103 174.87 265.237 179.335 264.171 176.603 267.103 174.87ZM203.993 185.999C198.528 182.201 200.394 179.135 201.66 173.271L208.658 174.337 203.993 185.999ZM173.537 194.663C173.537 191.731 174.87 186.599 175.603 183.534 176.603 179.069 176.736 176.203 181.734 175.67 178.802 183 177.736 187.466 173.537 194.663ZM295.293 172.538C297.826 175.403 301.824 190.398 301.824 194.196 298.159 192.997 298.225 188.199 297.159 184.267 296.293 181.134 293.361 174.87 295.293 172.538ZM213.923 168.739 218.188 178.935 211.923 173.471 213.923 168.739ZM274.634 170.472C276.1 173.337 280.565 188.598 280.432 191.531 278.299 189.398 272.835 171.938 274.634 170.472ZM237.048 176.603C236.381 173.204 237.514 167.473 238.914 164.607L241.246 170.472 237.048 176.603ZM289.629 176.203 288.296 169.539 291.961 171.138C292.028 174.404 292.228 173.87 289.629 176.203ZM276.567 170.005 281.898 168.606C283.831 176.936 288.296 183.534 282.631 190.531L276.567 170.005ZM243.379 170.472 246.244 166.54C247.977 171.671 250.043 175.003 248.177 180.268L243.379 170.472ZM299.692 168.806 300.291 173.537C296.959 172.538 296.093 169.939 299.692 168.806ZM287.496 180.868C285.563 178.402 283.497 170.872 284.697 168.672 287.829 170.472 288.696 177.536 287.496 180.868ZM304.49 167.673C308.289 169.339 309.888 179.069 309.888 183.2 307.089 178.869 305.29 172.937 304.49 167.673ZM305.023 177.736C302.557 175.736 300.491 170.072 301.957 167.206 303.757 169.739 304.89 174.537 305.023 177.736ZM202.393 171.138C202.327 167.206 205.059 160.342 207.125 156.877 211.99 163.008 212.39 165.14 209.857 172.804L202.393 171.138ZM224.719 169.139C224.852 164.874 225.585 158.609 227.451 155.211L229.117 160.475 224.719 169.139ZM276.367 167.873C272.368 161.408 273.768 155.81 277.633 149.346L281.165 166.873 276.367 167.873ZM242.579 168.006C237.514 160.809 239.913 158.01 241.246 150.012 242.246 152.878 243.179 155.677 244.112 158.809 245.645 163.874 246.311 164.007 242.579 168.006ZM235.448 171.605C232.383 163.541 229.65 163.607 233.582 155.344 239.18 161.608 236.648 164.007 235.448 171.605ZM178.869 174.27C178.802 168.939 182.401 161.675 185.266 157.61 185.133 161.075 184.333 164.274 183.667 167.739 182.867 171.938 183.267 173.271 178.869 174.27ZM302.357 158.476C304.29 161.475 305.49 161.742 305.756 165.873 302.691 164.94 302.357 161.608 302.357 158.476ZM253.442 157.276 250.043 171.671 247.377 164.474 253.442 157.276ZM301.224 165.607C299.292 162.408 298.292 156.743 298.159 152.811 300.025 155.61 302.357 162.675 301.224 165.607ZM264.038 174.204C261.705 170.938 259.906 161.075 258.64 156.677 257.24 151.545 256.707 152.545 259.839 148.813 261.772 146.48 263.105 145.148 265.504 143.815 266.903 147.347 268.903 156.543 269.636 160.609 270.635 166.273 269.103 171.405 264.038 174.204ZM192.397 174.27C192.797 169.739 198.262 151.678 200.527 147.947 205.126 152.078 202.593 157.01 201.594 163.208 200.394 170.405 200.194 172.804 192.397 174.27ZM220.654 177.469C214.589 168.539 214.389 166.473 216.388 161.542 218.321 156.677 220.72 148.48 223.386 144.748 226.851 151.878 225.452 153.544 224.119 161.008 223.186 166.473 222.653 172.338 220.654 177.469ZM230.183 157.276C227.851 152.945 227.318 151.279 228.651 146.48 230.317 148.346 231.983 151.279 232.516 153.878L230.183 157.276ZM309.621 159.476C310.488 162.941 312.021 167.406 312.354 170.872 308.955 164.607 306.089 156.61 305.689 149.279 306.356 151.812 308.555 155.477 309.621 159.476ZM239.713 143.348C240.58 147.68 239.447 153.944 237.381 157.743L234.715 152.878 239.713 143.348ZM280.832 155.877C279.965 151.945 278.633 147.813 279.099 143.815 280.765 146.947 285.697 164.54 284.697 167.073 281.831 165.274 281.565 159.342 280.832 155.877ZM187.399 145.747C188.265 148.346 186.732 151.479 184.6 153.678L187.399 145.747ZM246.911 161.941C243.045 150.745 240.113 142.082 246.444 130.686L254.241 152.345 246.911 161.941ZM195.796 155.877C195.596 152.545 196.196 147.747 197.928 145.081 199.994 149.279 196.929 150.146 195.796 155.877ZM184.933 172.604C185.799 167.873 192.131 146.147 194.53 143.815 196.529 147.147 191.064 171.005 189.798 173.737L184.933 172.604ZM297.759 167.339C295.027 165.473 291.828 148.813 291.628 145.681 295.96 147.813 295.56 150.745 296.559 155.077 298.092 162.008 299.758 164.341 297.759 167.339ZM294.227 164.141C295.16 167.806 296.026 168.272 293.561 169.539 291.695 165.94 291.228 160.475 290.362 156.277 288.762 148.946 288.096 148.613 288.562 143.348 288.562 142.882 288.829 142.882 288.962 142.615L294.227 164.141ZM275.434 138.95C276.7 145.081 275.967 150.679 272.435 155.677 270.969 151.212 268.969 145.947 268.769 141.349L275.434 138.95ZM218.721 135.018C223.586 139.483 221.986 142.482 219.92 147.747 218.321 151.945 216.922 157.21 214.256 160.542L218.721 135.018ZM226.518 147.08C225.252 144.748 224.319 143.415 225.119 141.149 227.851 143.615 227.318 143.282 226.518 147.08ZM287.696 151.678C288.762 156.943 290.562 162.741 290.695 167.939 286.097 167.673 287.296 167.406 286.03 162.341 285.43 159.942 284.764 157.543 284.164 155.077 283.031 150.612 280.698 144.148 280.632 139.749 286.363 139.55 285.03 139.416 287.696 151.678ZM262.638 129.087 264.904 141.416 257.374 148.413 262.638 129.087ZM233.449 150.412C226.985 141.416 229.784 138.617 232.982 120.157 234.848 124.888 236.514 131.219 237.847 136.217 239.047 140.882 236.914 147.48 233.449 150.412ZM283.831 134.951C285.563 138.217 284.564 136.884 283.897 138.283 281.432 136.884 281.298 137.75 283.831 134.951ZM203.793 148.08C200.061 145.481 201.261 144.414 202.393 140.149 203.393 136.617 203.127 134.351 206.525 133.218L203.793 148.08ZM212.123 160.542C206.792 156.144 208.125 152.745 210.124 147.014L213.723 135.751C215.322 131.886 213.789 134.018 216.122 133.085 217.455 136.151 213.723 156.877 212.123 160.542ZM198.795 144.081C196.596 140.549 198.262 137.15 201.727 135.818 201.327 138.75 200.661 142.149 198.795 144.081ZM206.525 151.812C204.659 149.346 206.259 145.214 206.792 142.082 207.392 138.683 207.725 135.084 208.791 131.686L212.923 132.552 206.525 151.812ZM184.133 146.414C184.267 141.416 186.199 135.551 188.665 131.552 188.465 135.618 186.599 143.148 184.133 146.414ZM216.055 131.419C215.189 129.553 215.922 128.82 216.588 127.754 218.121 130.686 218.321 130.22 216.055 131.419ZM290.495 127.82C292.494 131.819 293.894 140.349 294.16 145.214 289.762 141.016 290.762 143.881 289.095 136.95 287.829 131.686 285.497 129.553 290.495 127.82ZM303.424 156.41C299.292 152.878 294.96 130.42 294.827 128.554 299.158 131.019 303.09 150.812 303.424 156.41ZM249.043 125.954C251.109 129.82 252.975 138.35 253.042 143.015 251.709 140.483 250.709 137.217 249.576 134.285 247.977 130.02 246.844 129.553 249.043 125.954ZM229.984 127.154 227.384 141.216 225.252 138.816 229.984 127.154ZM268.236 139.55C266.437 137.084 263.438 125.755 263.838 122.489 264.104 119.957 267.77 108.627 268.903 106.761 270.236 109.694 274.767 133.752 274.567 137.084L268.236 139.55ZM211.124 117.424C215.389 121.489 215.722 125.155 213.589 130.753L208.924 129.82 211.124 117.424ZM256.041 145.481C254.375 139.483 253.042 133.418 251.576 127.354 250.443 122.756 250.509 122.822 253.308 119.224 254.841 117.224 256.574 115.025 257.973 113.359 260.572 120.023 261.905 122.356 260.173 129.287 258.906 134.218 257.64 141.082 256.041 145.481ZM246.911 124.688C246.844 124.555 246.644 124.288 246.644 124.022 246.111 121.356 247.377 119.89 247.91 118.49 248.843 122.556 249.177 121.756 247.311 125.355 247.177 125.155 246.977 124.888 246.911 124.688ZM240.047 137.417C238.314 133.418 237.248 127.621 235.915 123.222 233.649 115.558 233.782 116.758 236.781 109.894 238.58 111.826 242.379 120.157 243.512 123.089 245.111 127.421 242.912 134.151 240.047 137.417ZM223.519 137.284C217.655 131.352 220.12 125.155 221.32 117.757 222.453 110.693 223.519 104.229 226.252 97.698 228.251 101.896 232.449 114.225 231.183 118.49 230.117 122.356 225.585 134.618 223.519 137.284ZM283.164 110.693C288.362 113.559 290.162 119.623 290.295 125.888L287.229 127.154C285.363 124.422 283.231 114.225 283.164 110.693ZM203.993 108.961 208.458 112.759C207.991 116.425 206.259 121.023 205.126 124.688 203.726 129.287 203.993 133.085 199.195 134.151L203.993 108.961ZM250.11 120.09C247.977 113.692 249.443 102.363 252.442 96.2985 253.375 98.0979 256.641 107.561 256.84 109.627 257.174 112.359 252.109 118.291 250.11 120.09ZM191.198 142.282C189.731 139.216 193.13 127.754 193.93 123.355 195.063 117.358 195.263 111.493 200.927 108.761 200.861 113.492 193.997 139.083 191.198 142.282ZM266.57 106.695 262.505 119.29 260.972 109.494 266.57 106.695ZM244.778 121.356C242.846 118.224 240.646 113.759 239.114 110.027 237.447 105.828 240.18 100.83 242.512 97.2981 245.711 103.696 247.51 114.492 244.778 121.356ZM281.898 122.756C282.964 128.02 284.564 133.418 279.432 137.084L271.502 104.496 278.633 106.095C279.899 111.56 280.832 117.224 281.898 122.756ZM270.635 102.43C270.635 98.8976 273.235 94.0327 275.967 91.6335L277.9 103.896 270.635 102.43ZM232.649 110.16C230.583 107.295 228.717 100.097 228.051 96.1652 232.183 101.43 235.848 102.763 232.649 110.16ZM247.51 107.361C246.111 104.429 244.778 99.0975 244.378 95.6987 243.845 91.7668 246.444 89.3677 248.244 85.769 252.575 90.6339 250.309 101.896 247.51 107.361ZM236.781 104.362C235.115 100.497 238.514 89.2344 239.513 84.5694 242.579 91.5669 241.646 98.631 236.781 104.362ZM216.588 121.756C211.39 116.225 211.39 113.759 212.723 106.628 213.723 101.097 215.189 95.5654 216.988 90.0341L223.453 91.2337C224.119 101.563 218.188 113.292 216.588 121.756ZM257.174 104.096C256.041 101.763 255.108 98.4977 254.241 95.7654 252.775 91.1004 253.242 91.7668 254.508 88.1015L257.174 104.096ZM204.193 106.761C205.059 98.4311 206.592 95.3655 213.19 91.2337 212.99 96.765 211.124 105.429 208.991 110.627L204.193 106.761ZM292.494 107.095C293.827 112.493 296.359 121.023 296.693 126.288 291.561 118.357 286.163 100.564 285.83 90.7005 287.629 96.0319 290.828 100.23 292.494 107.095ZM281.965 86.4354C283.764 89.2344 287.429 108.094 287.563 112.226 283.764 107.761 281.965 108.094 280.499 101.83 279.499 97.698 277.766 92.1667 277.5 88.0348L281.965 86.4354ZM260.706 107.694C258.706 104.695 252.309 81.0374 259.173 71.974 261.505 74.0399 268.236 99.2974 267.836 103.229L260.706 107.694ZM219.654 81.3706C220.853 83.6364 222.386 86.5021 222.52 89.2344L217.521 88.1681 219.654 81.3706ZM251.842 88.5013C250.043 84.5028 248.843 82.97 252.309 79.6379 253.842 83.0366 253.442 85.5024 251.842 88.5013ZM243.312 90.1674C239.913 83.5698 240.58 77.5053 242.179 70.7744 243.779 72.8403 246.111 78.305 246.911 80.9707 247.844 84.3028 245.711 88.0348 243.312 90.1674ZM234.315 100.63C226.185 92.6332 227.984 91.9667 229.717 87.3018 231.583 82.1036 233.116 75.9059 235.915 71.3075 238.847 77.1055 238.58 80.3043 237.314 86.6353 236.648 90.234 235.781 97.8979 234.315 100.63ZM207.991 92.8331C208.125 87.635 211.124 78.1051 213.123 72.707L215.789 75.5727C216.188 82.97 214.322 89.5676 207.991 92.8331ZM275.434 74.3065C277.9 77.6386 281.165 80.4376 281.498 84.5694L277.233 86.0356C275.7 83.2366 274.501 76.7722 275.434 74.3065ZM206.859 73.84C208.125 75.2395 207.725 73.7733 207.858 76.1058L206.459 83.1699C204.459 93.3662 204.659 97.3648 197.729 106.295L206.859 73.84ZM264.304 69.2416 270.502 70.5078C274.101 81.5705 277.366 87.8349 269.369 97.7646L264.304 69.2416ZM239.58 75.0395C237.914 71.1076 236.248 67.9754 238.647 63.9102 242.379 68.1753 240.58 69.5082 239.58 75.0395ZM248.977 80.9041C248.044 79.5712 244.045 70.1746 243.579 68.3753 242.779 65.2431 244.578 58.179 245.978 54.9135L246.577 53.8472C248.244 57.6458 249.11 63.5104 250.043 67.7755 251.442 74.3065 253.108 75.3727 248.977 80.9041ZM226.252 90.8338C225.985 87.0352 230.517 62.2442 231.983 58.2456 236.048 65.3764 235.048 67.6422 232.316 75.0395 230.45 80.0377 228.917 86.8353 226.252 90.8338ZM280.032 64.1102C281.965 70.108 286.63 80.4376 287.096 87.5683 283.098 84.836 280.365 69.5748 280.032 64.1102ZM264.038 67.1757C261.372 62.1109 262.638 58.6455 266.037 54.0471L269.503 68.3086 264.038 67.1757ZM234.382 57.9124C236.848 60.4448 238.38 61.3112 236.115 64.3767L234.382 57.9124ZM211.124 70.108C210.924 65.7762 211.59 61.4444 214.856 58.912 214.522 62.1775 213.323 67.9754 211.124 70.108ZM224.519 86.3688C219.121 80.8374 220.92 76.5057 223.053 69.9081 224.785 64.5766 225.985 56.5795 229.317 52.581 231.25 55.5799 230.117 57.3792 229.317 61.3112 227.984 67.9754 226.252 82.037 224.519 86.3688ZM241.179 64.2434C239.114 61.1779 239.647 61.7777 241.246 58.4455 242.112 56.7128 243.045 54.6469 243.978 53.2474 243.912 56.1797 242.846 62.1109 241.179 64.2434ZM213.656 70.2413C214.589 66.0428 217.055 56.0464 220.12 53.8472L216.455 73.4401 213.656 70.2413ZM218.654 76.1058C217.855 71.2409 221.187 57.5125 222.386 51.648L226.585 52.4477C225.652 56.9128 220.853 73.1735 218.654 76.1058ZM272.435 63.6437C270.635 55.2467 268.436 51.0482 271.435 49.0489 273.568 50.7816 279.032 72.2405 279.432 76.1725 275.101 73.0403 273.634 69.4416 272.435 63.6437ZM249.843 58.9787C248.577 53.1808 245.711 48.1159 249.776 44.2507 251.709 46.9164 254.375 55.8465 255.841 59.5118 258.04 65.1764 259.506 70.4412 253.842 74.3731 251.642 71.041 250.776 63.3771 249.843 58.9787ZM224.652 42.6512C227.451 45.3836 229.184 47.5162 226.651 50.5151 224.052 50.0486 224.585 50.5151 222.853 48.849L224.652 42.6512ZM238.38 59.5785C232.716 53.7806 232.449 52.9808 234.382 44.9837 235.648 39.8523 236.514 31.5219 239.247 27.5234 244.911 41.8515 247.377 47.5828 238.38 59.5785ZM269.836 48.0493C268.17 45.9834 268.436 45.7834 268.436 42.9845 270.635 45.3836 272.302 46.4499 269.836 48.0493ZM255.508 38.2528C257.174 43.5842 260.972 58.1123 259.506 62.9106 257.773 60.978 252.175 45.3836 251.509 42.1847L255.508 38.2528ZM231.183 49.7154C229.784 45.9834 231.316 44.1174 233.182 40.8519 233.182 43.7842 232.383 47.7827 231.183 49.7154ZM213.789 57.046C213.856 53.7139 215.122 50.3151 215.855 47.0497 216.922 42.6512 216.188 40.1855 220.787 40.0522 219.521 45.6502 218.054 53.3141 213.789 57.046ZM262.772 38.586C266.304 38.586 265.571 39.0525 266.304 42.4513 267.903 49.9153 267.503 47.3162 266.37 49.1156L262.772 38.586ZM261.505 57.3792 257.507 37.5198C262.172 37.9196 262.172 42.1847 263.771 46.6498 265.304 50.9149 264.304 54.0471 261.505 57.3792ZM250.909 39.9855C249.443 37.2532 251.576 33.6545 253.575 30.8555L255.108 35.9203 250.909 39.9855ZM248.044 38.1195 246.178 43.7175 242.779 30.5889 248.044 38.1195ZM255.708 31.5886C254.775 28.4564 254.041 27.9232 256.041 24.9243L259.639 35.4538C255.974 35.7204 256.774 35.5871 255.708 31.5886ZM218.388 38.1862C218.854 32.9214 222.386 28.7896 225.785 25.6574 225.652 28.6563 224.585 30.2557 223.386 33.1214 221.853 37.0533 222.586 37.5864 218.388 38.1862ZM264.638 36.3868C260.506 36.6534 261.306 34.5209 259.706 30.0558 258.706 27.2568 257.507 24.791 257.507 21.9254L259.839 20.9924C261.839 23.7248 264.171 32.5882 264.638 36.3868ZM240.98 23.3915C243.045 20.5259 246.977 16.8606 248.843 14.4614 252.309 23.5248 253.908 27.79 248.577 36.1869 246.577 33.3213 241.579 26.3905 240.98 23.3915ZM228.984 45.1837C224.985 41.1185 224.852 39.9855 226.318 34.3876 227.451 29.8559 227.851 25.9906 230.117 21.9254L237.381 23.3249C237.181 27.5234 231.383 41.4517 228.984 45.1837ZM232.716 15.661 235.781 21.0591 230.717 19.8595 232.716 15.661ZM251.909 17.0605C253.775 19.9928 255.774 20.5259 253.508 24.1246L251.909 17.0605ZM222.986 25.724C223.319 23.3249 224.052 22.1253 224.985 19.8595 226.252 16.594 225.718 16.4607 229.25 15.7276 228.584 19.9928 225.785 22.7918 222.986 25.724ZM252.709 13.9283 258.04 14.9946C259.506 16.9939 259.839 17.1938 259.106 19.393L256.174 20.5926 252.709 13.9283ZM241.179 20.1927C241.579 16.6606 243.712 11.2626 245.911 8.33032L248.177 12.3955 241.179 20.1927ZM233.649 13.3952C234.848 11.3959 236.381 9.66317 237.781 7.79718 240.047 4.73162 240.113 4.73162 243.912 5.93119 243.579 9.79646 240.38 17.0605 238.047 20.5926L233.649 13.3952ZM252.175 3.73198 257.174 12.4622C250.643 12.7287 250.243 11.5292 247.111 5.73126L252.175 3.73198ZM234.582 8.99675C234.848 5.59798 234.848 4.99819 237.581 4.53169 236.714 7.86382 236.381 6.46433 234.582 8.99675ZM248.377 2.73235C246.178 4.46505 246.644 4.79826 244.445 2.13256L248.377 2.73235ZM213.19 42.8512C192.797 87.635 178.002 167.939 152.611 215.122 147.014 225.452 147.68 223.719 136.95 228.317 116.291 237.181 1.66606 292.228 0 294.227 5.19812 298.425 115.625 323.483 131.686 327.415 175.603 338.211 219.454 350.54 263.371 361.802 299.958 371.199 286.763 372.732 323.083 352.473 340.81 342.543 358.337 332.08 375.931 321.95 387.393 315.353 480.293 266.37 481.759 262.039L461.366 256.84C431.111 251.842 381.795 237.914 348.807 230.783 342.01 229.317 342.943 228.517 339.611 222.786 326.882 200.994 313.153 160.075 305.823 136.751 293.427 97.698 285.897 72.907 271.902 33.5879 270.436 29.456 254.974 2.53242 251.976 1.39949 247.91-0.0666426 242.979-0.599783 239.114 0.866353 226.785 5.59798 217.921 30.6556 213.19 42.8512Z" fill="#5B5B5B" fill-rule="evenodd" transform="matrix(1 0 0 1.00145 2410.12 2287)"/></g></g></g><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1431.35 2743)">param</text><path d="M3440 1969C3440 1993.3 3420.3 2013 3396 2013 3371.7 2013 3352 1993.3 3352 1969 3352 1944.7 3371.7 1925 3396 1925 3420.3 1925 3440 1944.7 3440 1969Z" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3422 1968.5C3422 1983.14 3410.36 1995 3396 1995 3381.64 1995 3370 1983.14 3370 1968.5 3370 1953.86 3381.64 1942 3396 1942 3410.36 1942 3422 1953.86 3422 1968.5Z" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3290 2253 3503.19 2253" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M3290 2129 3503.19 2129" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 0.236385 102.701" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 3396.24 2013)"/><path d="M3409 2129C3409 2136.18 3403.18 2142 3396 2142 3388.82 2142 3383 2136.18 3383 2129 3383 2121.82 3388.82 2116 3396 2116 3403.18 2116 3409 2121.82 3409 2129Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 1858.26 2933)">clip_log_std<tspan font-size="64" x="1353.34" y="-1068">clip_actions</tspan></text><path d="M3277 2478 3503.39 2478.67" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M3290 2293 3503.19 2293" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M3023.59 2678.04 3023 2478" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" stroke-dasharray="20.625 20.625" fill="none" fill-rule="evenodd"/><path d="M3011 2664.5C3011 2657.04 3016.82 2651 3024 2651 3031.18 2651 3037 2657.04 3037 2664.5 3037 2671.96 3031.18 2678 3024 2678 3016.82 2678 3011 2671.96 3011 2664.5Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2877.94 2933)">reduction</text><path d="M3277 2478.5C3277 2471.04 3282.82 2465 3290 2465 3297.18 2465 3303 2471.04 3303 2478.5 3303 2485.96 3297.18 2492 3290 2492 3282.82 2492 3277 2485.96 3277 2478.5Z" fill="#7030A0" fill-rule="evenodd"/><path d="M0 0 0.592224 77.7095" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 3023 2755.71)"/><path d="M3023 2478 3303.31 2478.56" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" stroke-dasharray="20.625 20.625" fill="none" fill-rule="evenodd"/><path d="M2979 2799C2979 2774.7 2998.7 2755 3023 2755 3047.3 2755 3067 2774.7 3067 2799 3067 2823.3 3047.3 2843 3023 2843 2998.7 2843 2979 2823.3 2979 2799Z" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3001.09 2792.33 3016.33 2792.33 3016.33 2776.35 3030.67 2776.35 3030.67 2792.33 3045.91 2792.33 3045.91 2806.67 3030.67 2806.67 3030.67 2822.65 3016.33 2822.65 3016.33 2806.67 3001.09 2806.67Z" fill="#7030A0" fill-rule="evenodd"/></g></svg>
\ No newline at end of file
diff --git a/docs/source/_static/imgs/model_multivariate_gaussian.svg b/docs/source/_static/imgs/model_multivariate_gaussian.svg
new file mode 100644
index 00000000..19bd5771
--- /dev/null
+++ b/docs/source/_static/imgs/model_multivariate_gaussian.svg
@@ -0,0 +1 @@
+<svg width="3568" height="1235" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" overflow="hidden"><defs><clipPath id="clip0"><rect x="155" y="1752" width="3568" height="1235"/></clipPath><clipPath id="clip1"><rect x="2443" y="2285" width="482" height="370"/></clipPath><clipPath id="clip2"><rect x="2443" y="2285" width="482" height="370"/></clipPath><clipPath id="clip3"><rect x="2443" y="2285" width="482" height="370"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-155 -1752)"><path d="M1027 2075C1027 2050.7 1046.7 2031 1071 2031 1095.3 2031 1115 2050.7 1115 2075 1115 2099.3 1095.3 2119 1071 2119 1046.7 2119 1027 2099.3 1027 2075Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2228C1027 2203.7 1046.7 2184 1071 2184 1095.3 2184 1115 2203.7 1115 2228 1115 2252.3 1095.3 2272 1071 2272 1046.7 2272 1027 2252.3 1027 2228Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2380C1027 2355.7 1046.7 2336 1071 2336 1095.3 2336 1115 2355.7 1115 2380 1115 2404.3 1095.3 2424 1071 2424 1046.7 2424 1027 2404.3 1027 2380Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2533C1027 2508.7 1046.7 2489 1071 2489 1095.3 2489 1115 2508.7 1115 2533 1115 2557.3 1095.3 2577 1071 2577 1046.7 2577 1027 2557.3 1027 2533Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1027 2685C1027 2660.7 1046.7 2641 1071 2641 1095.3 2641 1115 2660.7 1115 2685 1115 2709.3 1095.3 2729 1071 2729 1046.7 2729 1027 2709.3 1027 2685Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2075C1257 2050.7 1276.7 2031 1301 2031 1325.3 2031 1345 2050.7 1345 2075 1345 2099.3 1325.3 2119 1301 2119 1276.7 2119 1257 2099.3 1257 2075Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2228C1257 2203.7 1276.7 2184 1301 2184 1325.3 2184 1345 2203.7 1345 2228 1345 2252.3 1325.3 2272 1301 2272 1276.7 2272 1257 2252.3 1257 2228Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2380C1257 2355.7 1276.7 2336 1301 2336 1325.3 2336 1345 2355.7 1345 2380 1345 2404.3 1325.3 2424 1301 2424 1276.7 2424 1257 2404.3 1257 2380Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2533C1257 2508.7 1276.7 2489 1301 2489 1325.3 2489 1345 2508.7 1345 2533 1345 2557.3 1325.3 2577 1301 2577 1276.7 2577 1257 2557.3 1257 2533Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1257 2685C1257 2660.7 1276.7 2641 1301 2641 1325.3 2641 1345 2660.7 1345 2685 1345 2709.3 1325.3 2729 1301 2729 1276.7 2729 1257 2709.3 1257 2685Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1487 2152C1487 2127.7 1506.7 2108 1531 2108 1555.3 2108 1575 2127.7 1575 2152 1575 2176.3 1555.3 2196 1531 2196 1506.7 2196 1487 2176.3 1487 2152Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1487 2305C1487 2280.7 1506.7 2261 1531 2261 1555.3 2261 1575 2280.7 1575 2305 1575 2329.3 1555.3 2349 1531 2349 1506.7 2349 1487 2329.3 1487 2305Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1487 2457C1487 2432.7 1506.7 2413 1531 2413 1555.3 2413 1575 2432.7 1575 2457 1575 2481.3 1555.3 2501 1531 2501 1506.7 2501 1487 2481.3 1487 2457Z" fill="#C00000" fill-rule="evenodd"/><path d="M797 2228C797 2203.7 816.7 2184 841 2184 865.301 2184 885 2203.7 885 2228 885 2252.3 865.301 2272 841 2272 816.7 2272 797 2252.3 797 2228Z" fill="#70AD47" fill-rule="evenodd"/><path d="M797 2380C797 2355.7 816.7 2336 841 2336 865.301 2336 885 2355.7 885 2380 885 2404.3 865.301 2424 841 2424 816.7 2424 797 2404.3 797 2380Z" fill="#70AD47" fill-rule="evenodd"/><path d="M797 2533C797 2508.7 816.7 2489 841 2489 865.301 2489 885 2508.7 885 2533 885 2557.3 865.301 2577 841 2577 816.7 2577 797 2557.3 797 2533Z" fill="#70AD47" fill-rule="evenodd"/><path d="M1361.5 2075.5 1471.84 2152.25" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 75.5611" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2228.06)"/><path d="M0 0 110.337 227.87" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2380.37)"/><path d="M0 0 110.337 380.178" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2532.68)"/><path d="M0 0 110.337 532.487" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2684.99)"/><path d="M1471.84 2305.25 1361.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 75.5611" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2380.06)"/><path d="M0 0 110.337 227.87" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2532.37)"/><path d="M0 0 110.337 380.178" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1361.5 2684.68)"/><path d="M1131.5 2075.5 1241.84 2075.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2227.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2380.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2532.43)"/><path d="M0 0 110.337 609.234" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2684.73)"/><path d="M1131.5 2075.5 1241.84 2227.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2380.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2533.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2685.43)"/><path d="M1131.5 2075.5 1241.84 2380.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2380.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2380.5 1241.84 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2532.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2685.12)"/><path d="M1131.5 2684.5 1241.84 2684.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2532.5 1241.84 2684.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2380.5 1241.84 2685.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2685.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2075.5 1241.84 2684.73" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1131.5 2684.81)"/><path d="M1131.5 2532.5 1241.84 2532.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2380.5 1241.84 2532.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2228.5 1241.84 2533.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1131.5 2075.5 1241.84 2532.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M901.5 2532.5 1011.84 2684.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2532.5)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2380.5)"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2380.5)"/><path d="M1011.84 2380.81 901.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2075.5)"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2228.5)"/><path d="M1011.84 2533.12 901.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1011.84 2685.43 901.5 2228.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2075.5)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1011.84 2228.5)"/><path d="M1011.84 2532.81 901.5 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1011.84 2685.12 901.5 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 901.5 2533.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 901.5 2532.43)"/><path d="M1361.5 2075.5 1471.84 2304.56" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 785.207 2082)">input<tspan fill="#5B9BD5" font-size="64" x="303.023" y="-90">hidden</tspan><tspan fill="#ED7D31" font-size="64" x="645.891" y="-42">output</tspan><tspan fill="#595959" font-weight="700" font-size="64" x="203.623" y="-248">.compute(…)</tspan></text><path d="M0 0 81.1221 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 781.622 2228.5)"/><path d="M781.622 2380.5 700.5 2380.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 81.1221 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 781.622 2532.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.62 2457.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.62 2304.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.62 2152.5)"/><rect x="736" y="1892" width="900" height="893" stroke="#BFBFBF" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="none"/><path d="M626 2197C638.15 2197 648 2198.64 648 2200.67L648 2376.33C648 2378.36 657.85 2380 670 2380 657.85 2380 648 2381.64 648 2383.67L648 2559.33C648 2561.36 638.15 2563 626 2563" stroke="#70AD47" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 249.482 2325)">states (</text><path d="M492.763 2293.58C494.368 2293.58 496.092 2293.71 497.935 2293.97 499.779 2294.23 501.461 2294.58 502.982 2295.02L501.357 2302.48 497.388 2302.48C497.201 2300.55 496.706 2299.12 495.904 2298.2 495.102 2297.29 493.951 2296.83 492.451 2296.83 491.055 2296.83 489.935 2297.19 489.092 2297.91 488.248 2298.62 487.826 2299.6 487.826 2300.83 487.826 2301.91 488.18 2302.86 488.888 2303.67 489.597 2304.48 490.972 2305.51 493.013 2306.73 495.222 2308.05 496.805 2309.4 497.763 2310.8 498.722 2312.19 499.201 2313.83 499.201 2315.7 499.201 2317.83 498.633 2319.64 497.498 2321.14 496.362 2322.64 494.81 2323.76 492.842 2324.48 490.873 2325.21 488.628 2325.58 486.107 2325.58 482.066 2325.58 478.191 2325.08 474.482 2324.08L476.232 2316.33 480.201 2316.33C480.305 2318.2 480.789 2319.67 481.654 2320.73 482.519 2321.8 483.847 2322.33 485.638 2322.33 487.284 2322.33 488.555 2321.93 489.451 2321.12 490.347 2320.32 490.795 2319.19 490.795 2317.73 490.795 2316.86 490.649 2316.12 490.357 2315.52 490.066 2314.91 489.586 2314.32 488.92 2313.75 488.253 2313.18 487.17 2312.43 485.67 2311.52 483.586 2310.27 482.071 2308.97 481.123 2307.62 480.175 2306.28 479.701 2304.78 479.701 2303.11 479.701 2300.05 480.847 2297.69 483.138 2296.05 485.43 2294.4 488.638 2293.58 492.763 2293.58Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="474.482" y="2321.58">𝒔</text><path d="M514.274 2309.66 519.782 2309.66 518.451 2315.4 524.028 2315.4 523.408 2318.27 517.786 2318.27 515.56 2327.45C515.177 2329.02 514.913 2330.22 514.768 2331.04 514.623 2331.86 514.55 2332.55 514.55 2333.11 514.55 2333.83 514.68 2334.36 514.94 2334.69 515.2 2335.02 515.583 2335.18 516.087 2335.18 516.776 2335.18 517.484 2334.94 518.21 2334.45 518.937 2333.96 519.721 2333.2 520.563 2332.17L522.33 2333.99C520.784 2335.69 519.365 2336.87 518.073 2337.53 516.78 2338.2 515.254 2338.53 513.494 2338.53 511.75 2338.53 510.411 2338.08 509.478 2337.19 508.545 2336.29 508.078 2335.06 508.078 2333.48 508.078 2332.76 508.132 2332.08 508.239 2331.43 508.346 2330.78 508.568 2329.71 508.904 2328.23L511.268 2318.27 507.688 2318.27 508.124 2316.41C509.195 2316.41 510.006 2316.31 510.557 2316.11 511.108 2315.91 511.57 2315.61 511.945 2315.21 512.32 2314.82 512.683 2314.21 513.035 2313.4 513.387 2312.59 513.8 2311.34 514.274 2309.66Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="507.688" y="2334.92">𝒕</text><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 527.141 2325)">)<tspan font-weight="400" font-size="64" x="-332.75" y="77">with or without</tspan><tspan font-size="64" x="-301.148" y="154">actions (</tspan></text><path d="M510.326 2450.83C508.388 2450.83 506.67 2451.8 505.17 2453.75 503.67 2455.7 502.461 2458.32 501.545 2461.61 500.628 2464.9 500.17 2467.85 500.17 2470.45 500.17 2471.97 500.394 2473.11 500.842 2473.86 501.289 2474.61 501.982 2474.98 502.92 2474.98 504.295 2474.98 505.696 2474.31 507.123 2472.97 508.55 2471.62 509.701 2470.05 510.576 2468.23 511.451 2466.42 512.232 2463.94 512.92 2460.8L513.17 2459.64C513.503 2458.1 513.67 2456.56 513.67 2455.02 513.67 2453.64 513.409 2452.6 512.888 2451.89 512.368 2451.18 511.513 2450.83 510.326 2450.83ZM509.951 2447.58C513.055 2447.58 515.701 2448.33 517.888 2449.83L520.92 2447.58 524.42 2448.08 519.357 2469.33C519.045 2470.64 518.888 2471.83 518.888 2472.89 518.888 2473.58 519.003 2474.11 519.232 2474.48 519.461 2474.86 519.857 2475.05 520.42 2475.05 521.065 2475.05 521.763 2474.76 522.513 2474.17 523.263 2473.59 524.138 2472.72 525.138 2471.58L527.388 2473.83C525.201 2476.02 523.315 2477.52 521.732 2478.34 520.149 2479.17 518.482 2479.58 516.732 2479.58 514.982 2479.58 513.597 2479.09 512.576 2478.12 511.555 2477.16 511.045 2475.88 511.045 2474.3 511.045 2473.78 511.107 2473.28 511.232 2472.8L510.795 2472.73C509.274 2474.53 507.967 2475.88 506.873 2476.78 505.779 2477.69 504.618 2478.38 503.388 2478.86 502.159 2479.34 500.795 2479.58 499.295 2479.58 496.795 2479.58 494.899 2478.73 493.607 2477.03 492.316 2475.33 491.67 2472.86 491.67 2469.61 491.67 2466.8 492.076 2464.01 492.888 2461.23 493.701 2458.46 494.868 2456.07 496.388 2454.06 497.909 2452.05 499.821 2450.47 502.123 2449.31 504.425 2448.16 507.034 2447.58 509.951 2447.58Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="491.67" y="2475.58">𝒂</text><path d="M537.764 2463.66 543.272 2463.66 541.941 2469.4 547.518 2469.4 546.898 2472.27 541.275 2472.27 539.049 2481.45C538.667 2483.02 538.403 2484.22 538.258 2485.04 538.112 2485.86 538.039 2486.55 538.039 2487.11 538.039 2487.83 538.17 2488.36 538.43 2488.69 538.69 2489.02 539.072 2489.18 539.577 2489.18 540.266 2489.18 540.973 2488.94 541.7 2488.45 542.427 2487.96 543.211 2487.2 544.052 2486.17L545.819 2487.99C544.274 2489.69 542.855 2490.87 541.562 2491.53 540.269 2492.2 538.743 2492.53 536.984 2492.53 535.24 2492.53 533.901 2492.08 532.968 2491.19 532.034 2490.29 531.568 2489.06 531.568 2487.48 531.568 2486.76 531.621 2486.08 531.728 2485.43 531.836 2484.78 532.057 2483.71 532.394 2482.23L534.758 2472.27 531.178 2472.27 531.614 2470.41C532.685 2470.41 533.496 2470.31 534.046 2470.11 534.597 2469.91 535.06 2469.61 535.435 2469.21 535.81 2468.82 536.173 2468.21 536.525 2467.4 536.877 2466.59 537.29 2465.34 537.764 2463.66Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="531.178" y="2488.92">𝒕</text><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 550.63 2479)">)</text><path d="M1707 2418C1719.43 2418 1729.5 2419.68 1729.5 2421.75L1729.5 2527.75C1729.5 2529.82 1739.57 2531.5 1752 2531.5 1739.57 2531.5 1729.5 2533.18 1729.5 2535.25L1729.5 2641.25C1729.5 2643.32 1719.43 2645 1707 2645" stroke="#C00000" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M2001 2799C2001 2774.7 2020.48 2755 2044.5 2755 2068.52 2755 2088 2774.7 2088 2799 2088 2823.3 2068.52 2843 2044.5 2843 2020.48 2843 2001 2823.3 2001 2799Z" stroke="#C00000" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M2018 2799.5C2018 2784.86 2029.86 2773 2044.5 2773 2059.14 2773 2071 2784.86 2071 2799.5 2071 2814.14 2059.14 2826 2044.5 2826 2029.86 2826 2018 2814.14 2018 2799.5Z" fill="#C00000" fill-rule="evenodd"/><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1865.77 2518)">log standard<tspan font-size="64" x="-85.3645" y="77">deviations</tspan><tspan font-size="64" x="220.573" y="77">(</tspan></text><text fill="#C00000" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="64" transform="translate(2107.54 2595)">𝑙<tspan font-size="64" x="20.052" y="0">𝑜</tspan><tspan font-size="64" x="54.427" y="0">𝑔</tspan></text><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2201.5 2595)">(</text><text fill="#C00000" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="64" transform="translate(2222.7 2595)">𝜎</text><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2260.51 2595)">))<tspan fill="#ED7D31" font-size="64" x="-401.445" y="-383">mean actions</tspan><tspan fill="#ED7D31" font-size="64" x="-292.82" y="-306">(</tspan></text><path d="M2008.92 2260.71C2006.77 2260.71 2004.85 2261.57 2003.14 2263.3 2001.43 2265.03 2000.1 2267.35 1999.14 2270.26 1998.18 2273.16 1997.7 2276.06 1997.7 2278.96 1997.7 2281.13 1998 2282.74 1998.59 2283.82 1999.18 2284.89 2000.18 2285.43 2001.57 2285.43 2002.95 2285.43 2004.3 2284.83 2005.62 2283.65 2006.94 2282.46 2008.31 2280.77 2009.72 2278.57 2011.12 2276.37 2012.12 2273.83 2012.7 2270.96L2012.98 2269.61C2013.17 2268.76 2013.29 2268.02 2013.36 2267.4 2013.42 2266.77 2013.45 2266.09 2013.45 2265.36 2013.45 2263.76 2013.11 2262.58 2012.42 2261.83 2011.73 2261.08 2010.56 2260.71 2008.92 2260.71ZM2008.29 2258.24C2009.65 2258.24 2010.91 2258.39 2012.09 2258.69 2013.27 2258.99 2014.48 2259.53 2015.73 2260.3L2018.61 2258.24 2020.61 2258.74 2016.01 2278.55C2015.55 2280.53 2015.32 2282.07 2015.32 2283.18 2015.32 2284.01 2015.46 2284.61 2015.73 2284.99 2016 2285.36 2016.43 2285.55 2017.01 2285.55 2017.64 2285.55 2018.3 2285.3 2019.01 2284.79 2019.72 2284.28 2020.72 2283.3 2022.01 2281.86L2023.79 2283.61C2021.92 2285.61 2020.32 2287.04 2019 2287.88 2017.67 2288.72 2016.24 2289.15 2014.7 2289.15 2013.41 2289.15 2012.38 2288.72 2011.61 2287.86 2010.83 2287.01 2010.45 2285.88 2010.45 2284.46 2010.45 2283.31 2010.69 2282.09 2011.17 2280.8L2010.76 2280.68C2008.76 2283.59 2006.87 2285.73 2005.07 2287.08 2003.28 2288.44 2001.39 2289.11 1999.39 2289.11 1997.1 2289.11 1995.32 2288.25 1994.06 2286.52 1992.8 2284.79 1992.17 2282.36 1992.17 2279.24 1992.17 2275.66 1992.87 2272.21 1994.28 2268.91 1995.68 2265.61 1997.62 2263.01 2000.07 2261.1 2002.53 2259.19 2005.27 2258.24 2008.29 2258.24Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="30.9062" x="1992.17" y="2285.28">𝑎</text><path d="M1996.15 2247.93 2023.68 2247.93 2023.68 2251.24 1996.15 2251.24Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="3.3125" x="1996.15" y="2250.83">ത</text><path d="M2034.28 2272.95 2037.86 2272.95 2036.55 2278.78 2044.35 2278.78 2043.82 2281.33 2036.04 2281.33 2033.7 2290.95C2033.55 2291.59 2033.42 2292.17 2033.32 2292.68 2033.22 2293.19 2033.14 2293.65 2033.08 2294.07 2033.02 2294.48 2032.98 2294.85 2032.96 2295.19 2032.93 2295.53 2032.92 2295.84 2032.92 2296.13 2032.92 2297.05 2033.11 2297.77 2033.5 2298.29 2033.88 2298.81 2034.51 2299.07 2035.38 2299.07 2036.11 2299.07 2036.89 2298.8 2037.72 2298.25 2038.54 2297.71 2039.49 2296.86 2040.54 2295.7L2042.12 2297.3C2041.34 2298.17 2040.6 2298.92 2039.9 2299.53 2039.19 2300.14 2038.51 2300.64 2037.83 2301.03 2037.16 2301.42 2036.49 2301.7 2035.84 2301.88 2035.18 2302.06 2034.51 2302.14 2033.82 2302.14 2032 2302.14 2030.64 2301.72 2029.74 2300.86 2028.85 2300 2028.4 2298.74 2028.4 2297.07 2028.4 2296.51 2028.44 2295.88 2028.53 2295.2 2028.61 2294.52 2028.73 2293.86 2028.88 2293.22L2031.61 2281.33 2027.67 2281.33 2028.13 2279.56C2029.04 2279.56 2029.76 2279.47 2030.27 2279.3 2030.78 2279.12 2031.22 2278.88 2031.57 2278.58 2031.8 2278.36 2032.02 2278.1 2032.23 2277.78 2032.45 2277.47 2032.66 2277.09 2032.88 2276.65 2033.09 2276.2 2033.31 2275.68 2033.53 2275.08 2033.75 2274.47 2034 2273.76 2034.28 2272.95Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.1914" x="2027.67" y="2298.5">𝑡</text><path d="M2061.53 2274.47 2065.25 2274.47 2065.25 2286.54 2076.61 2286.54 2076.61 2290.05 2065.25 2290.05 2065.25 2302.12 2061.53 2302.12 2061.53 2290.05 2050.17 2290.05 2050.17 2286.54 2061.53 2286.54Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="2050.17" y="2298.66">+</text><path d="M2095.59 2272.17 2096.99 2272.17C2096.96 2273.01 2096.93 2273.77 2096.92 2274.43 2096.9 2275.1 2096.89 2275.75 2096.88 2276.37 2096.88 2277 2096.87 2277.62 2096.87 2278.23L2096.87 2295.67C2096.87 2296.15 2096.89 2296.56 2096.92 2296.9 2096.95 2297.24 2097 2297.54 2097.07 2297.77 2097.14 2298.01 2097.23 2298.21 2097.34 2298.38 2097.46 2298.55 2097.59 2298.69 2097.74 2298.82 2097.93 2299 2098.17 2299.14 2098.48 2299.24 2098.78 2299.34 2099.17 2299.43 2099.65 2299.49 2100.12 2299.56 2100.69 2299.62 2101.36 2299.65 2102.02 2299.69 2102.82 2299.72 2103.76 2299.73L2103.76 2301.73 2085.56 2301.73 2085.56 2299.73C2086.46 2299.7 2087.23 2299.67 2087.85 2299.63 2088.48 2299.59 2089.02 2299.54 2089.46 2299.47 2089.9 2299.4 2090.26 2299.33 2090.53 2299.24 2090.79 2299.16 2091.03 2299.05 2091.23 2298.93 2091.44 2298.79 2091.62 2298.64 2091.78 2298.48 2091.93 2298.32 2092.05 2298.12 2092.14 2297.88 2092.24 2297.63 2092.31 2297.33 2092.36 2296.98 2092.42 2296.63 2092.44 2296.19 2092.44 2295.67L2092.44 2279.22C2092.44 2278.62 2092.33 2278.2 2092.1 2277.96 2091.87 2277.71 2091.54 2277.59 2091.11 2277.59 2090.64 2277.59 2089.93 2277.83 2089 2278.32 2088.07 2278.81 2086.86 2279.54 2085.37 2280.5 2085.17 2280.15 2084.99 2279.8 2084.82 2279.45 2084.65 2279.1 2084.48 2278.74 2084.3 2278.37 2086.19 2277.33 2088.07 2276.29 2089.93 2275.26 2091.79 2274.23 2093.67 2273.2 2095.59 2272.17Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.5586" x="2084.3" y="2298.04">1</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2109.68 2289)">)</text><path d="M2326 2115C2338.15 2115 2348 2116.64 2348 2118.67L2348 2376.33C2348 2378.36 2357.85 2380 2370 2380 2357.85 2380 2348 2381.64 2348 2383.67L2348 2641.33C2348 2643.36 2338.15 2645 2326 2645" stroke="#595959" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2520.68 2140)">multivariate<tspan font-size="64" x="-128.058" y="77">gaussian distribution</tspan></text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="translate(2467.31 2344)">𝒩</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 2513.72 2344)">(</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="translate(2533.77 2344)">𝜇</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 2560.13 2344)">,</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="translate(2569.87 2344)">𝛴</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 2599.09 2344)">)</text><path d="M3000 2115C3012.15 2115 3022 2116.64 3022 2118.67L3022 2376.33C3022 2378.36 3031.85 2380 3044 2380 3031.85 2380 3022 2381.64 3022 2383.67L3022 2641.33C3022 2643.36 3012.15 2645 3000 2645" stroke="#595959" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3183.49 2214)">actions (</text><path d="M3467.83 2185.75C3465.89 2185.75 3464.17 2186.73 3462.67 2188.68 3461.17 2190.62 3459.96 2193.24 3459.05 2196.53 3458.13 2199.83 3457.67 2202.77 3457.67 2205.38 3457.67 2206.9 3457.89 2208.03 3458.34 2208.78 3458.79 2209.53 3459.48 2209.91 3460.42 2209.91 3461.8 2209.91 3463.2 2209.24 3464.62 2207.89 3466.05 2206.55 3467.2 2204.97 3468.08 2203.16 3468.95 2201.35 3469.73 2198.87 3470.42 2195.72L3470.67 2194.57C3471 2193.02 3471.17 2191.48 3471.17 2189.94 3471.17 2188.57 3470.91 2187.52 3470.39 2186.82 3469.87 2186.11 3469.01 2185.75 3467.83 2185.75ZM3467.45 2182.5C3470.56 2182.5 3473.2 2183.25 3475.39 2184.75L3478.42 2182.5 3481.92 2183 3476.86 2204.25C3476.55 2205.57 3476.39 2206.75 3476.39 2207.82 3476.39 2208.5 3476.5 2209.03 3476.73 2209.41 3476.96 2209.78 3477.36 2209.97 3477.92 2209.97 3478.57 2209.97 3479.26 2209.68 3480.01 2209.1 3480.76 2208.51 3481.64 2207.65 3482.64 2206.5L3484.89 2208.75C3482.7 2210.94 3480.82 2212.45 3479.23 2213.27 3477.65 2214.09 3475.98 2214.5 3474.23 2214.5 3472.48 2214.5 3471.1 2214.02 3470.08 2213.05 3469.06 2212.08 3468.55 2210.81 3468.55 2209.22 3468.55 2208.7 3468.61 2208.2 3468.73 2207.72L3468.3 2207.66C3466.77 2209.45 3465.47 2210.8 3464.37 2211.71 3463.28 2212.61 3462.12 2213.31 3460.89 2213.78 3459.66 2214.26 3458.3 2214.5 3456.8 2214.5 3454.3 2214.5 3452.4 2213.65 3451.11 2211.96 3449.82 2210.26 3449.17 2207.78 3449.17 2204.53 3449.17 2201.72 3449.58 2198.93 3450.39 2196.16 3451.2 2193.39 3452.37 2191 3453.89 2188.99 3455.41 2186.98 3457.32 2185.39 3459.62 2184.24 3461.93 2183.08 3464.54 2182.5 3467.45 2182.5Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="3449.17" y="2210.5">𝒂</text><path d="M3495.27 2198.59 3500.77 2198.59 3499.44 2204.32 3505.02 2204.32 3504.4 2207.19 3498.78 2207.19 3496.55 2216.37C3496.17 2217.95 3495.9 2219.14 3495.76 2219.96 3495.61 2220.78 3495.54 2221.47 3495.54 2222.04 3495.54 2222.76 3495.67 2223.28 3495.93 2223.61 3496.19 2223.94 3496.57 2224.11 3497.08 2224.11 3497.77 2224.11 3498.47 2223.86 3499.2 2223.37 3499.93 2222.88 3500.71 2222.12 3501.55 2221.1L3503.32 2222.91C3501.78 2224.61 3500.36 2225.79 3499.06 2226.46 3497.77 2227.12 3496.24 2227.46 3494.48 2227.46 3492.74 2227.46 3491.4 2227.01 3490.47 2226.11 3489.54 2225.22 3489.07 2223.98 3489.07 2222.41 3489.07 2221.69 3489.12 2221 3489.23 2220.35 3489.34 2219.7 3489.56 2218.64 3489.9 2217.15L3492.26 2207.19 3488.68 2207.19 3489.11 2205.33C3490.19 2205.33 3491 2205.23 3491.55 2205.03 3492.1 2204.84 3492.56 2204.54 3492.94 2204.14 3493.31 2203.74 3493.67 2203.14 3494.03 2202.33 3494.38 2201.52 3494.79 2200.27 3495.27 2198.59Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="3488.68" y="2223.85">𝒕</text><path d="M3520.71 2199.83 3524.43 2199.83 3524.43 2211.9 3535.79 2211.9 3535.79 2215.41 3524.43 2215.41 3524.43 2227.48 3520.71 2227.48 3520.71 2215.41 3509.35 2215.41 3509.35 2211.9 3520.71 2211.9Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3509.35" y="2224.02">+</text><path d="M3555.02 2195.4 3557.96 2195.4C3557.86 2196.86 3557.82 2198.9 3557.82 2201.5L3557.82 2220.69C3557.82 2221.57 3557.87 2222.24 3557.97 2222.69 3558.07 2223.15 3558.24 2223.51 3558.49 2223.8 3558.75 2224.08 3559.11 2224.3 3559.58 2224.46 3560.06 2224.62 3560.66 2224.74 3561.38 2224.83 3562.09 2224.91 3563.04 2224.97 3564.22 2225L3564.22 2227.09 3544.42 2227.09 3544.42 2225C3546.11 2224.92 3547.33 2224.82 3548.05 2224.69 3548.78 2224.56 3549.33 2224.37 3549.72 2224.11 3550.1 2223.85 3550.38 2223.48 3550.55 2223 3550.73 2222.53 3550.82 2221.76 3550.82 2220.69L3550.82 2204.09C3550.82 2203.47 3550.7 2203.02 3550.46 2202.74 3550.23 2202.46 3549.9 2202.33 3549.49 2202.33 3549.1 2202.33 3548.57 2202.5 3547.87 2202.85 3547.17 2203.21 3546.03 2203.93 3544.44 2205.01L3543.04 2202.58Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="31.6929" x="3543.04" y="2223.13">𝟏</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3570.01 2214)">)<tspan font-size="64" x="-310.418" y="153">log prob </tspan><tspan font-weight="400" font-size="64" x="-426.101" y="230">evaluated at</tspan></text><path d="M3530.03 2416.23C3527.88 2416.23 3525.96 2417.1 3524.25 2418.83 3522.54 2420.56 3521.21 2422.87 3520.25 2425.78 3519.29 2428.69 3518.81 2431.59 3518.81 2434.48 3518.81 2436.65 3519.11 2438.27 3519.7 2439.34 3520.3 2440.42 3521.29 2440.95 3522.69 2440.95 3524.06 2440.95 3525.41 2440.36 3526.73 2439.17 3528.06 2437.98 3529.42 2436.29 3530.83 2434.09 3532.23 2431.89 3533.23 2429.36 3533.81 2426.48L3534.09 2425.14C3534.28 2424.29 3534.41 2423.55 3534.47 2422.92 3534.53 2422.3 3534.56 2421.62 3534.56 2420.89 3534.56 2419.29 3534.22 2418.11 3533.53 2417.36 3532.84 2416.61 3531.68 2416.23 3530.03 2416.23ZM3529.41 2413.76C3530.76 2413.76 3532.03 2413.92 3533.2 2414.22 3534.38 2414.52 3535.59 2415.06 3536.84 2415.83L3539.72 2413.76 3541.72 2414.26 3537.12 2434.08C3536.67 2436.06 3536.44 2437.6 3536.44 2438.7 3536.44 2439.54 3536.57 2440.14 3536.84 2440.51 3537.11 2440.89 3537.54 2441.08 3538.12 2441.08 3538.75 2441.08 3539.42 2440.82 3540.12 2440.31 3540.83 2439.8 3541.83 2438.83 3543.12 2437.39L3544.91 2439.14C3543.03 2441.14 3541.43 2442.56 3540.11 2443.41 3538.79 2444.25 3537.35 2444.67 3535.81 2444.67 3534.52 2444.67 3533.49 2444.24 3532.72 2443.39 3531.95 2442.54 3531.56 2441.4 3531.56 2439.98 3531.56 2438.84 3531.8 2437.62 3532.28 2436.33L3531.87 2436.2C3529.87 2439.12 3527.98 2441.25 3526.19 2442.61 3524.39 2443.96 3522.5 2444.64 3520.5 2444.64 3518.21 2444.64 3516.43 2443.78 3515.17 2442.05 3513.91 2440.32 3513.28 2437.89 3513.28 2434.76 3513.28 2431.18 3513.98 2427.74 3515.39 2424.44 3516.8 2421.13 3518.73 2418.53 3521.19 2416.62 3523.64 2414.72 3526.38 2413.76 3529.41 2413.76Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="30.9062" x="3513.28" y="2440.81">𝑎</text><path d="M3555.39 2428.48 3558.97 2428.48 3557.66 2434.31 3565.46 2434.31 3564.93 2436.85 3557.16 2436.85 3554.81 2446.47C3554.66 2447.11 3554.53 2447.69 3554.44 2448.2 3554.34 2448.72 3554.26 2449.18 3554.19 2449.59 3554.13 2450 3554.09 2450.38 3554.07 2450.72 3554.05 2451.05 3554.03 2451.37 3554.03 2451.66 3554.03 2452.57 3554.23 2453.29 3554.61 2453.81 3554.99 2454.33 3555.62 2454.59 3556.49 2454.59 3557.22 2454.59 3558 2454.32 3558.83 2453.78 3559.66 2453.24 3560.6 2452.38 3561.65 2451.22L3563.24 2452.83C3562.46 2453.7 3561.71 2454.44 3561.01 2455.05 3560.31 2455.67 3559.62 2456.17 3558.95 2456.56 3558.27 2456.95 3557.61 2457.23 3556.95 2457.41 3556.29 2457.58 3555.62 2457.67 3554.93 2457.67 3553.11 2457.67 3551.75 2457.24 3550.86 2456.38 3549.96 2455.53 3549.51 2454.27 3549.51 2452.6 3549.51 2452.03 3549.55 2451.41 3549.64 2450.73 3549.72 2450.05 3549.84 2449.38 3549.99 2448.74L3552.73 2436.85 3548.78 2436.85 3549.24 2435.09C3550.16 2435.09 3550.87 2435 3551.38 2434.82 3551.9 2434.65 3552.33 2434.41 3552.68 2434.1 3552.91 2433.89 3553.13 2433.62 3553.35 2433.31 3553.56 2433 3553.77 2432.62 3553.99 2432.17 3554.2 2431.73 3554.42 2431.21 3554.64 2430.6 3554.86 2430 3555.11 2429.29 3555.39 2428.48Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.1914" x="3548.78" y="2454.02">𝑡</text><path d="M3582.64 2429.99 3586.36 2429.99 3586.36 2442.06 3597.72 2442.06 3597.72 2445.58 3586.36 2445.58 3586.36 2457.65 3582.64 2457.65 3582.64 2445.58 3571.28 2445.58 3571.28 2442.06 3582.64 2442.06Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3571.28" y="2454.19">+</text><path d="M3616.7 2427.7 3618.1 2427.7C3618.07 2428.54 3618.04 2429.29 3618.03 2429.96 3618.01 2430.62 3618 2431.27 3617.99 2431.9 3617.99 2432.52 3617.98 2433.14 3617.98 2433.76L3617.98 2451.2C3617.98 2451.67 3618 2452.08 3618.03 2452.43 3618.06 2452.77 3618.11 2453.06 3618.18 2453.3 3618.25 2453.53 3618.34 2453.74 3618.45 2453.91 3618.57 2454.07 3618.7 2454.22 3618.86 2454.34 3619.04 2454.53 3619.28 2454.67 3619.59 2454.77 3619.9 2454.87 3620.29 2454.95 3620.76 2455.02 3621.23 2455.09 3621.8 2455.14 3622.47 2455.18 3623.14 2455.22 3623.93 2455.24 3624.87 2455.26L3624.87 2457.26 3606.67 2457.26 3606.67 2455.26C3607.57 2455.23 3608.34 2455.19 3608.96 2455.16 3609.59 2455.12 3610.13 2455.06 3610.57 2455 3611.01 2454.93 3611.37 2454.85 3611.64 2454.77 3611.91 2454.68 3612.14 2454.58 3612.34 2454.46 3612.55 2454.32 3612.74 2454.17 3612.89 2454.01 3613.04 2453.85 3613.16 2453.65 3613.26 2453.4 3613.35 2453.16 3613.42 2452.86 3613.47 2452.51 3613.53 2452.15 3613.55 2451.72 3613.55 2451.2L3613.55 2434.74C3613.55 2434.15 3613.44 2433.73 3613.21 2433.48 3612.98 2433.24 3612.65 2433.11 3612.22 2433.11 3611.75 2433.11 3611.04 2433.36 3610.11 2433.85 3609.18 2434.34 3607.97 2435.06 3606.49 2436.03 3606.29 2435.68 3606.1 2435.32 3605.93 2434.97 3605.77 2434.62 3605.59 2434.26 3605.41 2433.89 3607.3 2432.85 3609.18 2431.82 3611.04 2430.78 3612.9 2429.75 3614.79 2428.72 3616.7 2427.7Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.5586" x="3605.41" y="2453.56">1</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3090.97 2590)">mean actions (</text><path d="M3560.35 2561.97C3558.42 2561.97 3556.7 2562.94 3555.2 2564.89 3553.7 2566.84 3552.49 2569.46 3551.57 2572.75 3550.66 2576.04 3550.2 2578.99 3550.2 2581.59 3550.2 2583.11 3550.42 2584.25 3550.87 2585 3551.32 2585.75 3552.01 2586.12 3552.95 2586.12 3554.32 2586.12 3555.72 2585.45 3557.15 2584.11 3558.58 2582.76 3559.73 2581.18 3560.6 2579.37 3561.48 2577.56 3562.26 2575.08 3562.95 2571.93L3563.2 2570.78C3563.53 2569.24 3563.7 2567.7 3563.7 2566.15 3563.7 2564.78 3563.44 2563.74 3562.92 2563.03 3562.39 2562.32 3561.54 2561.97 3560.35 2561.97ZM3559.98 2558.72C3563.08 2558.72 3565.73 2559.47 3567.92 2560.97L3570.95 2558.72 3574.45 2559.22 3569.38 2580.47C3569.07 2581.78 3568.92 2582.97 3568.92 2584.03 3568.92 2584.72 3569.03 2585.25 3569.26 2585.62 3569.49 2586 3569.88 2586.18 3570.45 2586.18 3571.09 2586.18 3571.79 2585.89 3572.54 2585.31 3573.29 2584.73 3574.17 2583.86 3575.17 2582.72L3577.42 2584.97C3575.23 2587.15 3573.34 2588.66 3571.76 2589.48 3570.18 2590.3 3568.51 2590.72 3566.76 2590.72 3565.01 2590.72 3563.62 2590.23 3562.6 2589.26 3561.58 2588.29 3561.07 2587.02 3561.07 2585.43 3561.07 2584.91 3561.13 2584.41 3561.26 2583.93L3560.82 2583.87C3559.3 2585.66 3557.99 2587.01 3556.9 2587.92 3555.81 2588.83 3554.64 2589.52 3553.42 2590 3552.19 2590.48 3550.82 2590.72 3549.32 2590.72 3546.82 2590.72 3544.93 2589.87 3543.63 2588.17 3542.34 2586.47 3541.7 2584 3541.7 2580.75 3541.7 2577.93 3542.1 2575.14 3542.92 2572.37 3543.73 2569.6 3544.89 2567.21 3546.42 2565.2 3547.94 2563.19 3549.85 2561.61 3552.15 2560.45 3554.45 2559.29 3557.06 2558.72 3559.98 2558.72Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="3541.7" y="2586.72">𝒂</text><path d="M3540.29 2548.49 3576.58 2548.49 3576.58 2551.8 3540.29 2551.8Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="3.3125" x="3540.29" y="2551.39">ഥ</text><path d="M3587.79 2574.8 3593.3 2574.8 3591.97 2580.54 3597.54 2580.54 3596.93 2583.4 3591.3 2583.4 3589.08 2592.58C3588.69 2594.16 3588.43 2595.36 3588.28 2596.18 3588.14 2596.99 3588.07 2597.69 3588.07 2598.25 3588.07 2598.97 3588.2 2599.5 3588.46 2599.82 3588.72 2600.15 3589.1 2600.32 3589.6 2600.32 3590.29 2600.32 3591 2600.07 3591.73 2599.58 3592.45 2599.09 3593.24 2598.34 3594.08 2597.31L3595.85 2599.12C3594.3 2600.82 3592.88 2602 3591.59 2602.67 3590.3 2603.34 3588.77 2603.67 3587.01 2603.67 3585.27 2603.67 3583.93 2603.22 3582.99 2602.33 3582.06 2601.43 3581.59 2600.2 3581.59 2598.62 3581.59 2597.9 3581.65 2597.22 3581.76 2596.57 3581.86 2595.92 3582.08 2594.85 3582.42 2593.36L3584.78 2583.4 3581.2 2583.4 3581.64 2581.55C3582.71 2581.55 3583.52 2581.45 3584.07 2581.25 3584.62 2581.05 3585.09 2580.75 3585.46 2580.35 3585.84 2579.95 3586.2 2579.35 3586.55 2578.54 3586.9 2577.73 3587.32 2576.48 3587.79 2574.8Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="3581.2" y="2600.06">𝒕</text><path d="M3613.24 2576.04 3616.96 2576.04 3616.96 2588.11 3628.31 2588.11 3628.31 2591.62 3616.96 2591.62 3616.96 2603.69 3613.24 2603.69 3613.24 2591.62 3601.88 2591.62 3601.88 2588.11 3613.24 2588.11Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3601.88" y="2600.23">+</text><path d="M3647.54 2571.61 3650.48 2571.61C3650.39 2573.08 3650.34 2575.11 3650.34 2577.71L3650.34 2596.9C3650.34 2597.79 3650.39 2598.46 3650.49 2598.91 3650.59 2599.36 3650.77 2599.73 3651.02 2600.01 3651.27 2600.29 3651.64 2600.51 3652.11 2600.67 3652.59 2600.83 3653.18 2600.96 3653.9 2601.04 3654.62 2601.13 3655.57 2601.18 3656.75 2601.21L3656.75 2603.3 3636.94 2603.3 3636.94 2601.21C3638.64 2601.14 3639.85 2601.03 3640.58 2600.9 3641.31 2600.77 3641.86 2600.58 3642.24 2600.32 3642.63 2600.06 3642.9 2599.69 3643.08 2599.22 3643.26 2598.74 3643.34 2597.97 3643.34 2596.9L3643.34 2580.31C3643.34 2579.68 3643.23 2579.23 3642.99 2578.95 3642.75 2578.68 3642.43 2578.54 3642.01 2578.54 3641.63 2578.54 3641.09 2578.72 3640.4 2579.07 3639.7 2579.42 3638.56 2580.14 3636.96 2581.22L3635.56 2578.79Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="31.6929" x="3635.56" y="2599.34">𝟏</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3662.53 2590)">)</text><path d="M1489 2613C1489 2588.7 1508.7 2569 1533 2569 1557.3 2569 1577 2588.7 1577 2613 1577 2637.3 1557.3 2657 1533 2657 1508.7 2657 1489 2637.3 1489 2613Z" fill="#C00000" fill-rule="evenodd"/><path d="M0 0 80.649 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1672.15 2613.5)"/><path d="M1705 2115C1717.43 2115 1727.5 2116.68 1727.5 2118.75L1727.5 2224.75C1727.5 2226.82 1737.57 2228.5 1750 2228.5 1737.57 2228.5 1727.5 2230.18 1727.5 2232.25L1727.5 2338.25C1727.5 2340.32 1717.43 2342 1705 2342" stroke="#ED7D31" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M2150.19 2432 1937 2432" stroke="#C00000" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M2150.19 2636 1937 2636" stroke="#C00000" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 0.236385 105.164" stroke="#C00000" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 2044 2755.16)"/><path d="M2031 2637C2031 2629.82 2037.04 2624 2044.5 2624 2051.96 2624 2058 2629.82 2058 2637 2058 2644.18 2051.96 2650 2044.5 2650 2037.04 2650 2031 2644.18 2031 2637Z" fill="#C00000" fill-rule="evenodd"/><g clip-path="url(#clip1)"><g clip-path="url(#clip2)"><g clip-path="url(#clip3)"><path d="M285.135 364.739C288.929 361.344 293.056 362.276 296.65 364.606 292.124 367.201 289.994 367.335 285.135 364.739ZM285.934 361.544C282.406 365.271 279.078 363.94 274.552 362.21 277.215 358.749 281.075 359.68 285.934 361.544ZM291.991 360.945C296.25 357.218 299.046 358.549 303.705 360.279 299.445 363.807 296.983 363.008 291.991 360.945ZM263.504 359.015C268.562 357.218 269.427 355.887 275.218 359.015 271.358 362.343 267.431 361.544 263.504 359.015ZM293.189 357.95C288.397 360.546 286.932 360.612 281.807 358.349 282.14 357.817 281.674 357.284 284.07 356.286 286.733 355.221 290.992 356.685 293.189 357.95ZM298.78 356.352C303.572 354.689 305.236 353.624 310.427 356.552 306.434 359.747 302.573 359.214 298.78 356.352ZM253.187 356.885C257.647 353.025 260.109 354.422 264.902 356.153 260.509 359.614 258.312 358.948 253.187 356.885ZM282.273 355.088C278.479 358.682 275.95 357.284 271.025 355.687 274.02 352.159 277.681 352.758 282.273 355.088ZM288.596 354.023C292.989 351.227 295.385 351.227 299.978 353.956 296.051 357.218 292.856 356.352 288.596 354.023ZM254.518 353.823C249.726 356.352 248.195 356.619 243.07 354.156 243.536 353.557 243.337 353.091 245.333 352.226 248.262 350.961 252.389 352.492 254.518 353.823ZM317.482 352.426C313.489 356.086 310.827 355.021 305.901 353.025 309.695 349.497 312.557 350.628 317.482 352.426ZM260.043 352.292C265.434 350.296 265.833 349.231 271.69 352.492 267.564 355.554 264.036 355.088 260.043 352.292ZM289.728 351.094C284.802 354.289 283.804 353.89 278.28 351.827 278.28 351.827 278.679 348.898 283.272 349.297 284.802 349.43 288.13 350.562 289.728 351.094ZM243.736 350.828C240.009 354.622 237.147 353.158 232.421 351.494 232.754 351.094 231.622 351.494 234.085 350.029 237.147 348.166 240.342 349.164 243.736 350.828ZM295.119 350.163C299.645 347.434 301.708 347.301 306.634 349.83 303.372 353.49 299.046 352.226 295.119 350.163ZM249.993 349.896C254.718 346.768 256.515 347.167 261.707 349.63 257.314 352.559 254.918 352.692 249.993 349.896ZM324.604 348.898C320.145 351.627 318.015 351.494 313.223 349.231 313.622 348.698 313.422 348.099 315.419 347.234 316.95 346.635 316.95 346.702 318.614 346.968 320.611 347.234 323.273 348.166 324.604 348.898ZM278.746 348.232C274.819 352.026 272.156 350.695 267.165 348.831 271.091 345.237 273.887 346.169 278.746 348.232ZM221.772 348.565C227.429 344.771 227.363 345.969 233.553 347.9 229.559 351.56 226.165 351.028 221.772 348.565ZM284.802 347.9C288.263 343.84 292.723 344.971 296.517 347.234 290.859 351.227 290.66 348.898 284.802 347.9ZM250.924 346.901C247.463 350.029 243.603 349.763 239.476 347.101 243.403 344.172 245.866 343.973 250.924 346.901ZM313.755 345.969C310.161 349.697 307.233 348.099 302.507 346.568 303.705 344.971 304.504 344.039 307.366 344.039 309.163 344.039 312.158 345.171 313.755 345.969ZM256.582 346.236C261.441 342.841 262.905 342.442 268.629 345.637 263.637 349.364 262.372 347.766 256.582 346.236ZM211.588 345.836C215.981 341.909 218.178 343.307 223.303 345.171 218.976 348.632 216.514 348.432 211.588 345.836ZM319.945 344.971C324.205 342.109 326.668 342.309 331.26 344.838 327.333 348.099 324.405 347.367 319.945 344.971ZM285.867 344.239C282.406 348.166 278.812 346.435 274.353 344.971 274.952 344.239 273.421 344.904 276.017 343.307 277.681 342.242 277.348 342.309 279.345 342.309 281.408 342.242 284.07 343.374 285.867 344.239ZM291.924 343.374C296.317 340.245 298.713 340.512 303.306 343.241 299.246 346.103 295.785 346.369 291.924 343.374ZM228.827 344.239C233.752 341.377 235.483 340.911 240.741 344.106 236.681 347.101 233.02 347.101 228.827 344.239ZM258.046 342.442C253.786 346.502 251.523 345.237 246.398 343.307 247.53 342.042 249.327 340.379 251.191 340.312 253.187 340.245 256.382 341.643 258.046 342.442ZM212.72 342.575C208.726 346.169 205.931 345.237 201.272 343.174 204.134 339.314 208.394 340.179 212.72 342.575ZM321.01 342.042C316.218 345.038 314.887 344.971 309.229 342.442 312.69 338.581 316.284 340.046 321.01 342.042ZM326.335 340.978C331.393 338.648 332.125 337.849 337.916 340.911 334.455 344.239 329.995 343.241 326.335 340.978ZM263.837 341.71 267.564 338.981 275.95 341.044C271.358 344.439 269.161 344.838 263.837 341.71ZM226.697 343.706 218.51 341.976C222.238 337.783 225.765 338.116 230.225 341.111L226.697 343.706ZM285.069 337.716 292.923 339.913C288.729 344.106 286 342.508 281.075 340.711L285.069 337.716ZM235.949 339.979C240.941 335.919 241.872 336.784 248.062 339.38 243.603 343.507 241.007 342.775 235.949 339.979ZM190.556 340.312C194.816 336.984 198.476 336.718 202.67 339.846 197.744 343.041 195.947 342.974 190.556 340.312ZM298.58 339.513C302.573 335.387 305.635 336.851 310.427 338.914 306.7 342.841 303.172 341.577 298.58 339.513ZM327.932 337.849C323.672 340.844 320.744 341.377 316.151 338.116 320.81 334.388 322.208 335.919 327.932 337.849ZM253.121 338.648 256.981 335.719C260.043 335.853 263.238 336.784 265.434 338.315 260.708 341.976 258.113 341.177 253.121 338.648ZM207.795 338.848 212.387 335.586 220.441 337.716C215.316 342.176 214.051 341.044 207.795 338.848ZM345.038 336.984C340.445 340.445 338.581 339.447 333.19 337.317 337.117 333.856 340.245 334.721 345.038 336.984ZM274.952 334.055 282.739 336.585C278.546 340.911 275.751 340.046 270.559 337.383L274.952 334.055ZM188.759 339.513C185.764 339.513 182.968 338.981 180.572 337.583 184.765 333.723 187.162 334.122 192.153 336.585L188.759 339.513ZM287.731 336.119 292.39 332.991 300.111 335.586C295.785 339.513 292.457 338.715 287.731 336.119ZM225.1 337.184 229.759 333.523 237.945 336.052C233.619 340.911 230.89 339.247 225.1 337.184ZM305.435 335.054 308.963 332.458C312.025 332.591 315.02 333.39 317.349 335.054 313.223 338.382 310.161 337.783 305.435 335.054ZM242.471 335.586C248.728 330.395 247.53 331.393 255.384 334.521 250.392 339.58 248.462 337.783 242.471 335.586ZM197.811 335.853C201.205 331.393 205.998 332.192 210.058 334.987 205.665 338.515 203.402 338.848 197.811 335.853ZM326.268 331.659 334.655 333.723C329.862 337.517 328.331 336.585 323.007 334.388L326.268 331.659ZM259.777 333.723C264.369 329.862 267.364 329.33 272.489 333.39 268.03 337.45 265.168 337.25 259.777 333.723ZM177.977 336.917 170.256 335.254C173.983 330.728 177.71 331.327 182.436 333.922L177.977 336.917ZM351.893 332.924C347.234 336.452 345.637 335.586 340.245 333.523 343.44 329.396 345.637 330.794 351.893 332.924ZM214.85 334.122 219.709 329.929 227.562 332.525C223.502 337.916 220.441 336.252 214.85 334.122ZM277.414 332.525 281.674 329.064 290.061 331.992C286.067 336.585 282.14 335.52 277.414 332.525ZM298.913 328.198 307.366 331.127C302.64 335.12 300.044 334.854 294.653 331.46L298.913 328.198ZM232.355 332.325 237.014 327.533 245.134 330.728C239.543 336.318 239.343 334.788 232.355 332.325ZM187.228 333.057C192.819 328.398 192.553 328.398 199.808 331.526 195.548 336.718 193.285 334.987 187.228 333.057ZM159.806 332.325 164.532 328.931 171.986 330.794C167.993 335.853 165.264 333.856 159.806 332.325ZM316.351 327.666 324.538 330.461 320.611 333.323 312.224 331.127C313.289 329.463 311.293 331.193 313.888 329.197 314.421 328.797 315.752 328.065 316.351 327.666ZM341.377 330.129C336.518 333.456 335.187 332.658 329.663 330.195 332.991 325.802 337.25 327.2 341.377 330.129ZM257.713 333.257 249.393 330.395 254.452 325.736 262.572 329.064 257.713 333.257ZM204.467 330.728C209.325 326.135 211.123 324.937 217.312 329.33 213.053 334.322 210.39 333.59 204.467 330.728ZM358.283 329.263C353.69 332.392 352.426 331.859 347.034 329.396 349.963 325.203 355.154 327.067 358.283 329.263ZM177.311 330.062C180.705 325.004 184.765 325.869 189.558 328.598 185.431 332.924 182.769 333.124 177.311 330.062ZM266.765 328.465 271.624 324.072 279.944 327.866C275.018 332.525 272.689 332.591 266.765 328.465ZM149.889 329.53C155.014 325.669 155.68 325.603 161.936 328.198 158.009 332.192 154.947 332.392 149.889 329.53ZM289.195 323.473 297.249 327.133 291.924 330.994 284.137 327.866C285.202 325.736 282.939 327.932 286.134 325.403 287.065 324.604 288.197 324.005 289.195 323.473ZM221.838 328.331 227.163 323.273 234.95 326.668C230.092 331.793 228.228 332.392 221.838 328.331ZM318.814 326.401 323.073 322.807 331.193 326.069 327.2 329.197 318.814 326.401ZM306.101 323.007 314.155 326.534C309.03 331.26 307.499 329.929 301.575 326.934 301.642 326.734 301.841 326.268 301.908 326.401L303.505 324.671C304.836 323.606 304.703 323.739 306.101 323.007ZM353.69 325.203C357.151 321.676 360.878 322.874 364.938 325.27 361.411 328.797 357.551 327.666 353.69 325.203ZM348.166 325.603 344.638 328.465 336.185 326.069 338.249 323.872C341.244 322.075 342.841 323.473 348.166 325.603ZM202.603 330.062 194.35 327.866 199.275 322.674 207.462 325.336 202.603 330.062ZM166.928 326.934 172.053 323.206 179.507 325.203C176.246 329.995 171.254 330.395 166.928 326.934ZM139.572 327.2C146.428 323.939 141.236 322.275 152.085 325.536 145.629 330.262 147.559 329.197 139.572 327.2ZM247.264 329.33 238.877 326.268 244.335 320.411C247.131 321.742 250.725 323.007 252.389 325.137L247.264 329.33ZM256.049 323.273 261.84 318.414 269.494 322.741C268.296 325.469 267.098 326.069 264.569 327.732 261.507 326.934 257.78 325.603 256.049 323.273ZM137.576 326.268 129.589 324.804C134.514 321.875 135.646 319.479 141.769 323.473L137.576 326.268ZM278.812 317.349C281.475 318.547 285.401 320.611 286.932 322.607L281.674 326.534 273.488 323.007C274.353 321.077 273.887 321.676 275.617 319.812 276.682 318.747 277.481 318.082 278.812 317.349ZM219.908 327.599 211.322 324.87 216.514 318.481 224.967 322.341 219.908 327.599ZM191.954 326.867 184.033 324.604 188.493 319.28C191.688 319.346 194.55 320.344 197.079 322.008L191.954 326.867ZM164.665 326.668 156.745 324.804C161.736 320.211 162.469 318.947 169.457 322.474 168.259 324.072 166.529 325.004 164.665 326.668ZM360.279 321.077C365.072 317.482 366.203 318.947 371.861 321.21L368.466 323.606C365.538 323.473 362.21 322.674 360.279 321.077ZM346.901 318.614 354.822 321.476C350.895 326.202 347.633 323.606 342.775 321.742L346.901 318.614ZM329.995 318.082 337.982 321.676C333.39 326.601 330.994 324.338 325.469 321.809 326.335 320.278 324.405 321.875 327.333 319.612 328.598 318.681 328.731 318.747 329.995 318.082ZM307.898 320.744 313.023 317.482 321.143 321.676 316.617 325.336C313.289 324.471 309.961 323.14 307.898 320.744ZM295.851 317.017 304.038 321.676 299.046 325.869 290.793 322.208C292.057 319.745 293.388 318.947 295.851 317.017ZM127.659 323.739C124.53 323.739 122.4 323.406 119.938 321.942L124.064 319.146 131.785 320.544 127.659 323.739ZM237.346 325.137C234.418 324.87 230.957 323.273 228.428 321.676L233.752 314.953 242.272 319.28 237.346 325.137ZM146.827 322.208C151.753 316.351 152.618 317.682 159.54 319.745 154.348 324.538 154.082 324.937 146.827 322.208ZM181.903 323.539 173.717 321.609 179.374 315.685 187.162 318.215 181.903 323.539ZM117.741 321.343C114.547 321.343 112.35 320.944 110.021 319.479 114.813 316.684 115.878 315.752 121.402 318.681L117.741 321.343ZM209.059 324.138 201.072 321.143 206.463 314.088 214.717 317.815 209.059 324.138ZM366.469 316.75C371.461 314.82 373.192 314.288 378.25 317.682 374.124 320.81 370.197 320.078 366.469 316.75ZM349.297 316.75 353.424 314.421 361.544 317.549 357.817 320.344C354.622 319.812 351.361 318.681 349.297 316.75ZM253.919 323.14 245.467 318.947 251.191 311.692C254.186 313.289 257.846 315.286 259.643 317.749L253.919 323.14ZM144.032 321.409 136.844 319.879C141.303 315.22 143.167 314.354 149.357 317.416 147.826 319.146 146.162 320.145 144.032 321.409ZM336.119 313.422C339.047 314.021 342.442 315.752 344.705 317.682L340.445 320.478 332.125 317.017 336.119 313.422ZM267.764 310.294C270.559 310.694 274.752 313.822 276.749 316.218L271.291 321.809 262.971 317.083 267.764 310.294ZM100.103 317.549C103.831 314.155 106.626 313.755 111.418 316.418 107.425 320.211 105.561 319.28 100.103 317.549ZM319.612 311.825 327.932 316.75 323.14 320.478 314.82 316.284 319.612 311.825ZM302.507 310.294 311.093 316.018 305.835 320.478 297.582 316.018 302.507 310.294ZM280.01 314.953 285.135 309.695 294.121 315.752 288.53 320.877C285.202 318.747 282.074 318.082 280.01 314.953ZM171.321 320.944 163.8 318.814 168.658 312.823 176.978 314.953 171.321 320.944ZM126.86 317.482C132.384 312.091 132.118 313.356 139.439 314.953 133.116 318.88 136.71 320.478 126.86 317.482ZM226.497 320.944 218.045 317.15 223.769 308.83 232.155 313.755 226.497 320.944ZM90.5191 315.153C93.5142 311.758 98.0402 312.091 101.834 314.221 97.2415 317.349 96.1766 317.349 90.5191 315.153ZM384.773 313.822C381.378 317.549 377.718 315.685 373.458 313.622 376.253 309.762 379.648 311.226 384.773 313.822ZM355.953 312.624 360.013 310.294 368.2 313.689 364.539 316.484C361.278 315.819 358.083 314.62 355.953 312.624ZM199.275 320.078C196.413 319.879 192.819 318.614 190.822 316.95L196.679 309.562 204.6 313.156 199.275 320.078ZM117.342 315.02 121.535 311.559 129.189 312.757C125.262 316.95 122.8 318.082 117.342 315.02ZM161.603 318.148 153.816 316.284 158.808 310.294 166.928 312.025 161.603 318.148ZM343.573 308.431 351.693 313.156 347.167 316.284 338.781 312.158 343.573 308.431ZM114.547 314.687 107.358 313.289C111.019 309.03 113.748 308.963 118.673 311.026L114.547 314.687ZM80.8016 312.757C84.7951 309.229 86.9915 310.294 91.7172 311.625 88.3227 315.153 85.261 315.086 80.8016 312.757ZM321.476 310.627 326.202 305.835 334.788 311.892 329.862 315.752 321.476 310.627ZM244.002 317.882 235.483 313.755C236.082 310.76 238.877 306.367 241.34 304.371L249.66 310.694 244.002 317.882ZM379.781 309.296C384.839 307.233 386.304 307.233 391.162 310.361 387.169 313.556 383.242 312.158 379.781 309.296ZM188.892 316.75 180.639 314.155 186.03 306.034 194.217 308.63 188.892 316.75ZM151.553 315.486 143.699 314.088 149.024 308.031 156.944 309.762 151.553 315.486ZM71.1507 310.294C74.9445 307.166 77.0744 307.565 81.6003 309.629 77.8065 312.823 75.3439 312.491 71.1507 310.294ZM362.476 308.564C368.599 305.835 368.666 306.301 374.656 310.028L370.729 312.69C367.934 311.892 364.339 310.427 362.476 308.564ZM304.104 308.963 309.229 303.172 317.948 310.294 312.823 314.953 304.104 308.963ZM261.041 315.885 252.389 310.228 257.913 301.242 266.898 308.497 261.041 315.885ZM216.048 316.418 207.795 312.491 213.386 303.106 221.972 307.632 216.048 316.418ZM97.3746 310.827C102.034 307.432 103.565 307.033 109.089 309.163 104.696 313.356 103.165 312.89 97.3746 310.827ZM286.866 308.098 292.257 300.776C295.518 303.306 299.112 306.234 300.776 309.496L295.851 314.554 286.866 308.098ZM269.561 307.765 275.218 300.044 283.937 308.231 278.613 314.687C275.95 313.489 271.025 309.828 269.561 307.765ZM141.436 313.422 134.115 311.958C135.978 306.5 141.17 305.302 146.495 307.965L141.436 313.422ZM345.304 306.9 349.896 304.104 358.283 308.963 353.69 312.224C350.828 310.893 347.034 309.03 345.304 306.9ZM88.0565 308.63C91.5175 305.369 95.2448 305.169 99.2382 307.565 94.912 310.294 93.4477 311.359 88.0565 308.63ZM61.167 308.297C65.6929 304.637 66.891 305.702 72.2156 307.299 67.8228 310.827 65.9592 309.695 61.167 308.297ZM131.586 311.093 123.865 309.961 128.923 304.903 136.245 305.835 131.586 311.093ZM397.885 306.767C393.425 309.562 391.296 308.763 386.636 306.301 386.969 305.901 387.235 305.036 388.833 304.371 391.429 303.372 395.821 305.435 397.885 306.767ZM178.576 313.356 170.722 311.093 176.046 302.64 184.433 305.036 178.576 313.356ZM78.2059 306.5C82.3325 303.372 84.1295 303.106 88.9883 305.435 85.5273 308.896 82.399 308.297 78.2059 306.5ZM372.393 302.973C375.455 303.306 378.783 304.703 381.245 306.5L376.653 309.03 369.331 305.435 372.393 302.973ZM328.065 304.903 332.924 300.51 341.51 307.033 336.718 310.827 328.065 304.903ZM122.334 309.163 114.48 308.164C118.274 303.039 120.87 303.239 126.527 304.836L122.334 309.163ZM51.516 305.502C55.7092 302.906 58.3715 303.039 62.2319 305.103 58.0387 307.632 55.8423 308.098 51.516 305.502ZM352.093 303.172C354.156 300.976 354.422 300.91 357.018 300.244L364.672 305.103 360.346 308.164 352.093 303.172ZM233.087 312.224 224.834 307.233 230.624 296.317 239.41 303.106C238.012 305.968 235.616 310.095 233.087 312.224ZM105.029 306.168C106.293 304.171 107.159 304.171 109.355 302.573L116.61 303.172C112.816 307.698 110.819 308.231 105.029 306.168ZM68.3552 304.038C72.8812 301.708 74.8114 300.976 79.537 303.505 75.2107 306.168 73.1474 306.567 68.3552 304.038ZM404.075 303.039C400.347 306.634 397.219 304.903 392.76 302.573 397.951 299.778 398.284 300.643 404.075 303.039ZM206.131 311.559C203.136 310.694 199.808 309.562 197.744 307.698L203.468 297.249 212.054 301.908 206.131 311.559ZM168.792 310.294 160.538 308.697 166.462 299.911 174.249 301.841 168.792 310.294ZM102.167 305.635 94.8454 304.371C99.3048 300.643 100.503 300.31 106.227 302.108L102.167 305.635ZM41.7985 303.172C46.3245 300.776 47.7222 300.51 52.4478 302.773 48.7206 305.635 45.4592 305.302 41.7985 303.172ZM375.654 301.242C380.979 299.445 382.177 299.246 387.435 302.973L384.041 305.236C381.179 304.504 377.585 303.172 375.654 301.242ZM310.76 301.176 315.419 296.184 324.737 304.57 320.012 309.163C316.817 307.366 312.491 304.104 310.76 301.176ZM69.0874 301.109C65.9592 304.703 63.4299 303.705 59.1037 302.108 61.6994 299.046 64.9608 299.046 69.0874 301.109ZM250.725 309.096 242.272 302.906C242.871 299.246 245.799 294.254 248.062 291.325L256.582 299.445 250.725 309.096ZM158.808 307.698C156.145 307.832 152.951 307.366 150.821 306.101L156.412 298.048 163.8 299.379 158.808 307.698ZM85.7269 302.041C90.2529 298.846 91.0516 298.78 96.6425 300.71 92.5824 303.971 90.9185 304.836 85.7269 302.041ZM31.8814 300.71C36.7401 298.513 37.4057 297.781 42.3976 300.51 38.4041 303.106 36.1411 302.973 31.8814 300.71ZM410.265 299.778C406.67 303.172 403.675 301.109 399.216 299.312 403.143 296.517 405.805 297.315 410.265 299.778ZM358.615 298.78 362.676 296.384C365.471 297.515 369.664 299.911 371.062 301.908L367.667 303.905C364.606 303.372 360.679 301.109 358.615 298.78ZM339.447 295.452 348.166 302.64 343.307 306.234 334.788 299.512 339.447 295.452ZM298.979 291.724 307.965 301.642 302.64 307.698 293.655 299.379 298.979 291.724ZM268.363 306.767C265.168 305.435 261.041 301.908 259.444 298.713L265.035 288.463 273.887 298.647 268.363 306.767ZM75.8098 300.244C79.8033 296.983 81.8 297.182 86.7253 299.046 82.5321 302.241 81.1344 302.374 75.8098 300.244ZM49.3862 299.578C53.2465 296.783 55.1102 297.049 59.7027 298.979 56.7076 301.775 53.4462 301.708 49.3862 299.578ZM382.244 297.914C387.169 296.317 389.033 296.117 393.758 299.645L390.763 301.575C387.834 301.176 384.24 299.778 382.244 297.914ZM276.35 298.181 281.874 288.929 290.726 298.913C289.927 301.375 287.065 304.836 285.135 306.5L276.35 298.181ZM148.624 305.835 141.17 304.903C141.436 302.707 144.764 298.846 146.561 297.249L153.949 297.848 148.624 305.835ZM32.7466 297.914C29.4187 301.176 26.6233 300.111 22.3635 298.447 26.8229 296.317 27.6882 295.385 32.7466 297.914ZM138.441 304.371 131.186 303.372C135.712 296.45 134.847 296.317 143.633 296.916 143.366 299.112 140.105 302.84 138.441 304.371ZM66.0257 298.38C70.1523 295.718 71.4169 294.786 76.4753 297.049 72.6815 301.043 70.951 299.445 66.0257 298.38ZM196.08 307.033 187.494 304.104 193.352 292.257 201.804 296.117 196.08 307.033ZM128.524 303.039 121.269 301.974 126.66 296.25 133.982 296.45 128.524 303.039ZM112.084 300.843C114.347 295.585 118.207 295.585 123.798 296.051 121.202 302.174 118.274 301.775 112.084 300.843ZM39.6687 297.515C43.1963 294.32 46.9235 294.653 50.1183 296.983 44.9933 299.512 44.9933 299.046 39.6687 297.515ZM416.454 296.25C412.661 299.711 410.398 297.781 405.739 296.051 409.266 293.122 412.261 294.187 416.454 296.25ZM365.471 295.718C369.864 291.724 373.125 294.919 377.518 298.048L373.325 300.643 365.471 295.718ZM223.103 306.034 214.916 301.375C215.382 298.58 219.043 290.327 220.907 288.33L229.093 294.52C228.494 297.515 225.166 303.838 223.103 306.034ZM102.1 299.312C106.759 294.919 106.959 294.32 114.014 295.851 108.09 300.377 111.884 301.442 102.1 299.312ZM23.1622 295.385C19.435 298.78 17.5714 297.781 12.9788 296.117 16.5064 293.255 18.7028 293.588 23.1622 295.385ZM388.766 294.653C393.758 293.122 395.289 293.322 400.081 296.384 396.487 299.578 391.895 297.448 388.766 294.653ZM317.416 294.919 322.341 289.728C325.603 291.392 329.929 296.184 331.26 299.578L326.268 303.306 317.416 294.919ZM99.571 298.846 92.4493 297.848C97.5077 293.788 97.3746 293.788 104.164 295.052L99.571 298.846ZM56.5744 295.718C61.0338 293.455 63.0306 292.989 67.2238 295.252 62.8309 297.848 60.7676 298.181 56.5744 295.718ZM345.969 290.992 354.689 298.58 350.362 301.575C346.968 299.911 344.172 297.049 341.51 294.453L345.969 290.992ZM82.9315 296.117C87.524 292.523 88.1896 292.39 94.1798 294.187 89.3211 298.048 89.3211 297.781 82.9315 296.117ZM30.1509 294.786C34.1443 292.057 36.5404 292.523 40.7336 294.387 36.8067 296.983 34.2775 297.116 30.1509 294.786ZM57.3066 293.056C53.3797 295.918 51.5826 295.651 46.9901 293.588 50.5177 291.192 53.0469 290.793 57.3066 293.056ZM13.7775 293.056C10.3831 296.317 8.18666 295.385 3.72726 293.921 6.5227 290.593 9.31814 291.325 13.7775 293.056ZM422.778 293.056C419.183 296.051 416.255 294.453 412.195 292.856 415.19 289.994 418.917 290.859 422.778 293.056ZM395.821 292.19C399.016 289.328 402.677 291.059 406.338 293.056 403.076 296.317 399.349 294.52 395.821 292.19ZM371.794 292.523 375.787 290.46 384.107 295.052 380.047 297.315 371.794 292.523ZM80.602 295.452 73.214 294.387C78.8714 290.66 77.6068 291.192 84.4623 292.59L80.602 295.452ZM185.564 303.439 177.444 300.843 183.368 288.463 191.688 291.392 185.564 303.439ZM20.4999 292.523C24.8928 289.994 26.2239 289.794 30.883 291.991 27.2223 294.853 24.0941 294.387 20.4999 292.523ZM240.408 301.109 231.622 294.254 237.746 280.143 246.265 288.929C245.799 292.39 242.804 298.114 240.408 301.109ZM64.1621 292.19C67.7562 288.53 69.8861 289.861 74.6783 290.926 70.7513 293.987 69.0208 294.254 64.1621 292.19ZM37.6719 291.325C41.1995 288.397 43.7953 288.929 47.9884 290.859 43.7287 292.989 42.6638 293.721 37.6719 291.325ZM378.916 289.728C382.71 286.932 386.57 289.328 389.964 292.19 385.904 294.919 382.244 292.923 378.916 289.728ZM347.9 290.061 352.825 287.531 361.211 294.72 356.818 297.715 347.9 290.061ZM300.177 290.127 305.635 282.606 314.82 294.453 309.362 300.244 300.177 290.127ZM20.9658 289.794C16.573 292.723 15.6412 291.791 10.7159 290.194 14.9756 287.664 16.3068 287.265 20.9658 289.794ZM418.451 288.929C422.844 287.332 424.774 287.531 428.967 289.728 425.773 292.523 421.247 291.392 418.451 288.929ZM401.679 289.262C405.539 286.799 409.466 287.598 412.86 290.26 407.669 292.723 406.87 291.325 401.679 289.262ZM175.381 299.978 167.46 298.247 173.051 286.4C175.847 286.134 178.842 286.799 181.238 287.798 180.373 291.791 177.777 296.716 175.381 299.978ZM54.378 290.061C58.9705 286.799 59.6361 287.598 64.8942 289.062 61.4332 292.257 58.5712 291.791 54.378 290.061ZM324.271 288.197C325.603 285.801 327.067 284.869 329.33 283.471L338.249 293.721 333.257 297.981 324.271 288.197ZM116.077 293.921 109.488 293.522 114.28 288.197 120.803 287.798 116.077 293.921ZM99.7041 292.656C103.365 288.33 105.362 287.864 111.086 288.463 108.823 293.721 105.029 293.388 99.7041 292.656ZM90.5857 291.325C94.3795 287.332 95.8438 287.598 101.235 288.397 98.9054 292.19 94.912 292.923 90.5857 291.325ZM28.2207 288.862C31.9479 286.067 34.8099 286.666 38.4706 288.663 34.2109 290.926 32.8132 291.259 28.2207 288.862ZM385.505 287.265C389.166 284.47 392.294 286.999 396.487 289.062 392.826 292.59 389.099 290.194 385.505 287.265ZM283.272 287.332 288.663 277.015C291.392 279.877 296.716 287.199 297.582 290.66L292.257 297.715 283.272 287.332ZM257.913 297.781 248.728 289.062 254.718 275.151 263.837 286.999 257.913 297.781ZM212.92 300.177 204.467 295.518 210.59 280.543 219.309 286.999 212.92 300.177ZM80.7351 290.127C84.1295 286.067 86.925 287.065 91.7172 287.664 86.3925 291.591 88.3893 291.591 80.7351 290.127ZM354.555 286.932 359.214 284.669 367.667 291.858 363.474 294.054 354.555 286.932ZM271.957 274.353 280.942 287.132C279.145 290.26 277.082 294.786 274.353 296.45 273.221 295.252 266.565 287.598 266.432 286.4 266.366 284.603 270.692 275.884 271.957 274.353ZM165.464 297.715 157.876 296.65C158.808 292.59 161.337 288.796 163.267 285.002L171.188 285.401 165.464 297.715ZM125.928 294.121 119.139 294.054C122.134 287.664 124.064 286.866 131.253 286.866L125.928 294.121ZM71.0176 288.397C75.8098 285.002 75.477 285.002 81.9331 286.733 77.5403 290.327 76.6085 289.395 71.0176 288.397ZM44.5274 287.731C50.1183 284.669 49.253 285.535 55.443 286.866 50.318 289.794 50.9836 289.994 44.5274 287.731ZM435.157 286.466C431.63 288.929 428.701 287.997 424.841 286.134 428.901 283.604 430.964 284.27 435.157 286.466ZM419.25 286.932C414.591 289.262 413.26 288.862 408.334 286.2 412.394 283.604 414.924 285.002 419.25 286.932ZM135.845 294.52 128.59 294.453C130.787 288.263 134.248 285.202 141.17 285.934L135.845 294.52ZM18.5697 286.466C22.6298 283.604 24.6265 284.536 29.0194 286.067 24.9593 288.463 22.9626 288.996 18.5697 286.466ZM392.094 284.802C395.555 282.406 398.883 284.137 402.344 286.466 398.817 289.129 395.289 287.332 392.094 284.802ZM155.48 295.984 147.693 295.319 153.683 284.47 161.204 284.669 155.48 295.984ZM144.897 295.119 138.241 294.786C140.504 287.598 143.633 284.203 151.22 285.069 149.623 288.397 147.626 292.723 144.897 295.119ZM61.6329 286.267C65.1605 283.272 68.4884 283.538 72.0825 285.268 67.3569 288.197 67.1572 288.197 61.6329 286.267ZM45.3927 285.069C41.732 287.332 40.4008 287.731 35.5421 285.401 38.3375 283.005 41.4657 282.673 45.3927 285.069ZM361.344 283.871C366.336 282.539 364.805 281.807 368.466 284.669 370.596 286.333 371.994 287.132 373.924 289.395L369.265 291.059 361.344 283.871ZM415.123 283.138C418.385 280.343 422.312 282.207 425.174 283.671 421.047 286.067 419.05 285.734 415.123 283.138ZM51.9819 284.337C56.7076 281.341 56.7741 281.408 62.4316 283.272 58.6378 286.866 56.9738 285.468 51.9819 284.337ZM431.097 282.473C435.557 280.609 437.354 281.009 441.613 282.806 438.286 285.601 434.292 284.869 431.097 282.473ZM398.617 282.273C401.878 279.944 406.071 281.741 408.734 283.737 404.674 285.867 402.078 284.869 398.617 282.273ZM368.466 282.273C372.393 279.478 376.386 283.405 380.114 286.4 375.721 289.994 372.127 285.934 368.466 282.273ZM330.994 282.406 335.653 278.346 344.838 289.328 339.913 292.923 330.994 282.406ZM87.8568 285.335C91.7172 281.807 93.248 281.475 98.5061 282.473 94.1133 286.733 94.3129 286.533 87.8568 285.335ZM26.0242 283.072C29.5518 280.41 32.8132 280.609 36.1411 282.939 32.1476 284.936 30.5502 285.335 26.0242 283.072ZM307.299 280.543 312.224 273.953C315.153 277.215 320.078 284.337 321.276 288.064L316.551 293.189C312.224 289.661 310.095 284.137 307.299 280.543ZM103.764 286.599 97.2415 286.2C102.366 282.273 100.969 281.541 108.423 281.807L103.764 286.599ZM85.5938 284.736 78.8714 284.337C81.6669 280.543 84.5289 281.208 88.7886 281.874 87.6571 283.804 87.524 283.072 85.5938 284.736ZM113.681 286.4 106.959 286.466C110.154 281.874 112.417 280.742 118.141 280.276L113.681 286.4ZM69.2871 282.606C71.0841 279.212 76.3422 279.744 79.4039 281.208 74.5452 284.137 75.2107 283.871 69.2871 282.606ZM42.3976 282.074C47.0566 278.879 47.9219 279.611 52.9137 281.075 48.4543 284.603 47.6557 283.272 42.3976 282.074ZM420.714 280.21C425.04 277.348 426.838 278.546 431.563 280.343 427.237 283.272 425.706 281.874 420.714 280.21ZM374.656 280.343C380.313 279.212 382.177 281.341 386.503 284.47L382.377 285.934 374.656 280.343ZM437.287 278.945C441.613 277.747 443.743 277.348 447.737 279.744 444.276 282.14 440.216 281.541 437.287 278.945ZM414.724 280.676C411.396 283.205 408.268 281.874 404.807 279.811 407.735 277.414 411.13 278.613 414.724 280.676ZM382.044 279.012C385.239 277.414 390.097 279.877 392.36 282.207 388.3 284.203 384.839 282.074 382.044 279.012ZM230.225 293.122 221.639 286.533 227.629 269.095 236.282 278.213 230.225 293.122ZM202.803 294.453 194.483 290.726 200.54 274.087 209.192 279.544 202.803 294.453ZM59.3033 280.41C63.2303 277.947 65.6264 277.614 69.6864 279.544 65.1605 282.539 64.5614 282.207 59.3033 280.41ZM42.9966 278.945C39.7352 282.007 37.206 281.408 33.0794 279.478 37.1395 276.882 38.271 277.082 42.9966 278.945ZM116.277 286.067C119.538 280.543 121.868 279.345 127.991 277.814 126.261 284.203 122.8 286.267 116.277 286.067ZM388.367 277.348C391.429 275.617 396.021 277.747 398.351 279.944 394.757 282.007 391.162 279.877 388.367 277.348ZM337.65 277.614 342.042 274.819C344.705 276.749 350.029 283.405 350.895 286.267L346.702 288.596 337.65 277.614ZM50.318 278.147C54.1784 275.285 55.7092 276.216 60.1686 277.481 56.4413 280.343 54.378 280.343 50.318 278.147ZM426.971 276.682C431.364 274.819 433.36 275.085 437.487 277.082 433.626 279.744 430.831 278.945 426.971 276.682ZM410.597 276.749C414.724 275.285 417.386 275.418 421.247 277.88 417.187 279.811 414.258 279.078 410.597 276.749ZM85.6604 279.544C89.1214 276.15 91.2513 276.283 96.11 276.815 91.7837 280.742 91.8503 280.676 85.6604 279.544ZM76.8747 278.346C79.4705 275.484 82.998 274.885 86.1263 277.015 82.5987 279.478 81.201 280.01 76.8747 278.346ZM443.411 275.817C447.537 274.087 450.199 274.286 454.06 276.283 450.399 278.613 446.738 278.28 443.411 275.817ZM101.035 280.276 94.912 280.276C100.37 277.215 96.8421 276.083 105.827 276.083L101.035 280.276ZM404.541 277.281C400.946 279.611 398.218 277.947 394.69 275.751 394.757 275.751 394.158 273.354 400.148 275.218 401.878 275.751 403.276 276.35 404.541 277.281ZM289.927 275.018 295.452 264.835C298.314 268.629 300.377 273.155 302.707 277.082 305.968 282.473 303.039 283.405 299.112 288.263L289.927 275.018ZM192.752 289.462 184.366 286.733 190.29 269.76C193.352 270.093 196.08 271.557 198.676 273.221L192.752 289.462ZM133.05 284.47 125.995 285.468C128.058 279.611 131.785 275.484 138.108 275.085L133.05 284.47ZM66.7579 276.815C70.4851 273.354 72.2822 274.486 77.0078 275.285 72.8146 278.479 72.1491 278.413 66.7579 276.815ZM40.2677 275.95C44.9268 273.621 45.5924 273.621 50.5177 275.418 47.1897 278.346 44.0615 277.947 40.2677 275.95ZM417.586 274.02C420.581 271.491 424.175 272.889 427.503 274.552 423.31 276.882 421.579 276.416 417.586 274.02ZM344.239 274.02 348.499 272.489C350.695 274.286 356.885 281.208 357.351 283.471L352.758 285.135 344.239 274.02ZM105.095 279.744C107.092 276.017 110.753 274.286 115.212 274.353 113.282 278.679 109.954 280.21 105.095 279.744ZM66.9575 274.087C63.3634 276.882 61.8325 276.35 57.5728 274.819 57.5728 274.819 58.0387 272.955 61.5663 272.689 63.4299 272.489 65.4932 273.354 66.9575 274.087ZM433.227 273.221C437.553 271.957 439.683 271.557 443.61 273.953 439.95 276.35 436.422 275.751 433.227 273.221ZM400.747 273.354C404.541 271.69 407.602 272.689 411.063 274.619 407.136 276.882 404.208 275.551 400.747 273.354ZM255.783 273.088C256.715 269.161 259.91 259.377 262.106 256.715 266.832 266.299 275.617 271.424 264.502 284.337L255.783 273.088ZM247.131 287.398 238.611 278.413 244.535 260.642C245.999 261.84 252.788 271.225 253.187 272.755 253.653 275.018 248.661 285.535 247.131 287.398ZM460.183 272.622C456.323 275.684 454.06 274.552 449.6 272.889 453.794 269.96 455.125 271.358 460.183 272.622ZM351.227 272.09 355.288 271.291 364.14 280.875 359.414 282.473 351.227 272.09ZM313.822 272.556C315.353 269.827 317.083 268.03 319.546 266.832L328.132 281.408 323.073 286.2 313.822 272.556ZM135.779 284.137C137.509 277.215 140.704 272.755 148.025 271.69 147.227 274.419 145.629 277.414 144.365 280.21 142.301 284.802 140.637 284.27 135.779 284.137ZM278.679 258.512C280.476 260.309 284.403 268.363 286.067 271.424 287.997 274.819 287.997 274.819 286.067 278.28 284.802 280.476 283.471 283.205 281.807 284.936 280.543 283.604 273.887 274.087 273.421 272.622 272.556 270.027 277.148 261.241 278.679 258.512ZM83.8633 274.02C86.8584 270.958 89.3211 271.225 93.4477 271.89 90.4526 274.885 88.3227 275.617 83.8633 274.02ZM47.7888 272.689C51.5826 270.027 53.9121 270.093 58.0387 272.09 53.5793 274.486 52.5144 274.619 47.7888 272.689ZM423.044 271.091C426.971 268.828 429.966 269.427 433.427 271.291 429.433 273.887 427.37 273.088 423.044 271.091ZM406.937 270.626C411.063 269.294 412.993 269.694 416.92 271.757 413.792 274.22 409.865 272.889 406.937 270.626ZM378.117 270.692C382.377 270.692 384.972 272.156 388.167 274.885 384.44 276.949 380.646 273.554 378.117 270.692ZM381.977 276.549C377.851 278.08 374.922 274.486 371.661 271.557 371.794 271.424 372.06 271.091 372.193 271.291 372.26 271.424 372.593 271.025 372.726 271.025 373.924 270.958 372.925 270.626 374.589 271.091 374.989 271.225 376.719 272.356 377.252 272.622 379.515 274.087 380.646 274.552 381.977 276.549ZM364.872 270.958C369.997 270.426 372.659 274.752 376.386 277.814L371.727 278.679 364.872 270.958ZM357.75 271.291C365.604 270.559 364.739 274.819 370.063 279.012L366.336 280.41 357.75 271.291ZM120.67 277.88 114.28 278.945C116.41 274.885 120.204 271.291 124.996 270.958L120.67 277.88ZM93.0483 274.619C96.1766 271.225 98.6392 271.225 103.232 271.158 98.5727 273.953 101.501 275.751 93.0483 274.619ZM73.813 272.955C78.2724 269.694 78.8714 270.492 84.2626 271.225 78.5386 273.687 81.8666 274.619 73.813 272.955ZM439.417 270.16C444.542 267.897 444.742 268.962 450 270.226 446.139 272.689 443.277 272.689 439.417 270.16ZM384.307 270.293C388.101 269.494 391.695 270.825 394.224 273.354 390.696 274.952 387.502 272.423 384.307 270.293ZM182.702 285.734 174.516 284.203 180.506 266.898 188.626 268.762 182.702 285.734ZM64.2286 271.225C68.089 268.229 69.8195 268.629 74.4786 270.16 69.6864 273.288 69.8861 272.489 64.2286 271.225ZM455.657 269.095C460.25 267.564 461.381 267.165 466.24 269.627 462.047 271.69 459.451 271.557 455.657 269.095ZM391.296 268.695C394.757 268.096 398.084 269.361 400.813 271.358 396.82 272.689 393.825 271.69 391.296 268.695ZM219.842 284.669 211.655 279.012C211.921 274.952 215.782 263.038 217.645 259.111 219.975 259.976 220.973 261.64 222.97 263.57 225.699 266.233 226.564 266.166 225.433 270.359 224.7 273.022 221.04 282.739 219.842 284.669ZM103.298 273.953C104.896 270.759 108.556 269.561 112.417 269.361 108.889 273.288 108.956 274.419 103.298 273.953ZM413.659 268.296C416.454 265.9 420.448 267.165 423.51 268.828 419.583 270.892 417.386 270.426 413.659 268.296ZM152.418 282.872 145.097 283.272C146.029 279.744 147.892 276.749 149.357 273.621 151.353 269.427 153.55 268.962 158.408 268.695L152.418 282.872ZM55.1767 269.095C58.8374 266.499 60.3683 266.632 64.6945 268.429 61.9657 271.091 58.9705 271.025 55.1767 269.095ZM429.3 267.564C433.36 265.966 435.889 266.033 439.484 268.229 436.023 270.559 432.362 269.827 429.3 267.564ZM397.685 267.431C400.747 265.7 404.075 267.098 407.07 268.828 402.877 270.492 401.279 269.827 397.685 267.431ZM172.452 283.804 164.665 283.005 170.455 266.632 178.442 266.565 172.452 283.804ZM445.54 266.565C449.8 265.367 452.13 264.968 455.857 267.231 451.997 269.427 448.802 269.228 445.54 266.565ZM161.869 282.673 154.947 282.806C155.28 279.544 159.207 271.025 160.871 267.83L168.392 266.699C167.394 270.359 163.933 280.343 161.869 282.673ZM123.732 276.949C125.529 271.89 129.456 267.497 134.78 265.966 132.917 272.689 130.853 275.751 123.732 276.949ZM90.7188 269.561C94.5126 266.898 95.4444 266.699 100.303 266.898 96.6425 270.492 96.1766 270.359 90.7188 269.561ZM81.201 268.895C84.3292 265.9 86.7919 266.499 91.0516 266.965 86.326 270.093 87.4574 270.16 81.201 268.895ZM461.714 266.033C465.242 263.97 468.769 263.903 472.164 266.299 468.17 268.363 465.641 267.897 461.714 266.033ZM72.1491 267.497C74.9445 264.303 77.4072 265.367 81.4672 266.166 78.4721 269.161 76.1426 268.828 72.1491 267.497ZM419.183 264.902C423.51 263.637 425.373 263.637 429.367 265.567 425.706 268.163 422.378 267.231 419.183 264.902ZM403.143 265.168C407.003 263.437 409.067 264.036 412.927 265.833 409.998 268.363 406.537 266.965 403.143 265.168ZM320.544 265.101 325.403 260.841 334.721 276.416 329.995 280.41 320.544 265.101ZM112.616 272.289C114.081 268.828 118.34 266.432 122.201 265.367 119.272 270.16 118.74 272.289 112.616 272.289ZM101.102 269.095C103.764 266.1 105.694 265.567 109.821 265.367 106.094 267.697 108.29 269.76 101.102 269.095ZM435.49 264.502C439.816 262.572 441.68 262.905 445.674 264.635 442.213 267.165 439.084 266.832 435.49 264.502ZM62.365 265.634C65.227 262.705 68.1556 263.437 72.0159 264.635 68.4884 267.697 66.8244 267.098 62.365 265.634ZM451.597 263.437C456.323 261.374 457.255 262.106 462.113 263.637 458.652 266.166 455.058 265.833 451.597 263.437ZM387.968 263.304C390.63 262.106 394.291 263.504 396.554 265.501 392.826 266.432 390.896 265.634 387.968 263.304ZM380.979 263.703C384.373 263.637 388.234 264.902 390.63 267.165 385.372 268.229 385.039 266.432 380.979 263.703ZM374.656 263.837C377.917 263.57 382.244 265.966 384.24 268.163 378.516 269.095 378.45 265.634 374.656 263.837ZM467.704 262.905C471.565 260.109 473.362 260.775 477.888 262.705 475.092 265.7 471.698 264.036 467.704 262.905ZM409.998 262.505C413.127 260.442 416.321 261.441 419.383 263.104 415.722 265.434 413.193 264.502 409.998 262.505ZM368.533 262.705C372.06 263.371 375.721 266.299 378.25 268.762L374.39 268.962 368.533 262.705ZM302.307 253.786 311.559 272.289 306.034 278.945C303.505 275.684 300.643 269.96 298.58 266.299 296.849 263.038 296.25 263.371 298.247 260.043 299.445 257.913 300.776 255.583 302.307 253.786ZM88.5889 264.768C92.4493 262.106 92.9818 262.239 97.774 262.772 94.8454 265.767 92.9818 265.501 88.5889 264.768ZM78.8049 263.837C82.1993 261.441 83.7302 260.908 87.8568 262.439 85.1945 265.634 82.998 264.635 78.8049 263.837ZM425.506 261.973C430.631 259.91 430.964 261.108 435.557 262.04 432.362 264.835 428.701 264.436 425.506 261.973ZM393.958 261.707C396.82 260.176 400.547 261.574 403.01 263.104 399.349 264.768 396.82 263.77 393.958 261.707ZM110.753 267.098C112.084 264.635 115.811 262.639 118.939 262.04 115.878 265.634 116.344 267.165 110.753 267.098ZM98.5727 264.768C101.568 262.239 102.899 262.106 106.892 261.973 103.897 264.835 102.899 265.168 98.5727 264.768ZM441.68 260.975C445.607 259.377 448.203 259.643 451.73 261.507 448.469 263.77 444.542 263.437 441.68 260.975ZM361.943 260.975C365.205 261.64 370.33 266.166 372.193 268.828 367.8 269.494 369.265 268.962 366.403 266.166 364.339 264.169 363.541 263.437 361.943 260.975ZM133.316 274.02C134.714 268.163 138.907 262.04 144.764 260.043 144.165 263.038 142.501 265.966 141.37 268.895 139.639 273.155 137.842 273.354 133.316 274.02ZM69.2205 261.973C73.1474 259.843 75.3439 259.244 79.0046 261.507 74.7448 263.637 73.6799 263.703 69.2205 261.973ZM457.654 260.309C461.581 258.312 464.443 258.379 468.104 260.575 464.177 262.639 461.581 262.505 457.654 260.309ZM237.014 276.283C231.223 269.694 227.629 269.161 229.226 263.637 230.558 258.911 232.687 249.527 234.817 245.799 237.28 248.994 243.337 256.182 242.937 259.843 242.738 261.574 238.012 274.619 237.014 276.283ZM121.935 269.627C123.133 265.301 127.326 261.707 131.452 259.51 130.254 265.168 127.459 268.163 121.935 269.627ZM415.39 259.377C419.649 257.846 421.114 257.979 425.107 260.043 421.713 262.239 418.518 261.64 415.39 259.377ZM399.216 259.577C403.209 257.78 405.606 258.712 409.399 260.509 404.807 262.239 403.808 261.574 399.216 259.577ZM327.2 259.51 332.059 257.048 341.31 273.088 336.452 275.684 327.2 259.51ZM209.791 277.215C206.929 276.283 203.335 274.153 201.272 271.624L207.528 250.392 215.715 256.781C215.582 261.773 211.522 271.957 209.791 277.215ZM108.823 262.772 115.678 259.244C114.081 261.973 111.618 263.97 108.823 262.772ZM441.347 259.178C438.219 261.707 435.623 260.841 432.029 259.244 434.425 256.382 437.886 257.447 441.347 259.178ZM378.184 258.379C381.378 258.512 383.908 259.577 386.437 261.374 382.909 261.84 380.646 260.841 378.184 258.379ZM354.888 258.845C359.081 259.044 364.073 265.501 366.07 268.828 361.145 269.494 362.343 268.962 359.414 264.968 358.482 263.703 355.487 259.244 354.888 258.845ZM95.9769 260.575C99.2382 258.312 100.303 258.246 104.297 258.579 101.634 261.307 99.9704 261.041 95.9769 260.575ZM86.5922 259.843C89.2545 257.314 91.4509 257.713 95.2448 258.579 92.3827 261.108 90.5857 261.307 86.5922 259.843ZM457.987 258.046C454.326 260.442 452.13 260.176 448.07 258.113 450.799 255.517 454.326 256.648 457.987 258.046ZM383.708 257.514C388.167 257.447 388.833 258.113 392.893 259.577 389.432 261.041 386.503 259.976 383.708 257.514ZM367.667 257.447 373.724 261.174C370.529 261.374 369.531 259.976 367.667 257.447ZM348.166 256.981C352.625 257.247 352.625 258.579 355.021 261.84 356.486 263.837 359.081 266.832 360.013 268.895L355.953 269.294 348.166 256.981ZM334.588 256.249C340.445 255.051 339.646 256.915 342.575 261.906 344.239 264.768 346.236 267.63 347.5 270.626L343.107 272.423C341.111 269.228 334.588 258.845 334.588 256.249ZM119.805 263.837C122.134 262.306 123.133 258.645 127.459 256.715 126.727 260.376 124.198 263.238 119.805 263.837ZM77.0078 258.379C80.136 256.116 82.9315 255.85 85.6604 258.113 82.3325 259.976 80.602 260.109 77.0078 258.379ZM405.339 256.781C409.333 255.251 411.396 255.583 414.99 257.58 411.795 259.91 408.468 258.645 405.339 256.781ZM285.268 242.538C287.931 245.733 290.46 253.387 292.457 257.647 294.653 262.505 295.984 262.306 288.862 272.822 276.749 255.45 280.21 253.653 285.268 242.538ZM119.272 259.044 123.066 255.583C122.4 258.179 122.201 258.579 119.272 259.044ZM438.019 255.916C441.014 253.187 444.01 254.252 447.404 255.983 444.209 258.179 441.347 257.713 438.019 255.916ZM431.164 256.648C428.635 259.444 425.307 258.379 422.112 256.848 424.375 254.252 427.17 254.851 431.164 256.648ZM389.565 255.983C393.758 255.384 395.023 255.717 398.55 257.58 395.356 259.244 392.161 258.246 389.565 255.983ZM361.877 255.184C364.206 256.382 365.937 257.78 367.068 259.976 364.606 259.71 365.671 260.908 363.541 258.246 363.208 257.846 361.943 255.251 361.877 255.184ZM341.044 255.717C346.835 255.65 345.836 256.848 348.831 261.507 350.628 264.236 352.559 266.565 353.956 269.494L349.43 270.293 341.044 255.717ZM254.385 270.626C243.07 258.845 244.601 256.382 251.656 237.28 261.174 252.255 262.505 255.051 254.385 270.626ZM142.701 271.225C142.9 268.296 145.296 263.903 146.628 260.975 148.491 256.715 150.288 255.317 154.681 253.587 154.482 257.114 152.418 261.174 151.22 264.702 149.223 270.359 148.491 269.561 142.701 271.225ZM396.287 254.252C399.349 252.322 402.211 253.587 405.206 254.718 401.878 257.114 399.349 256.182 396.287 254.252ZM268.229 236.814C271.491 239.809 274.353 248.528 276.483 253.054 278.613 257.647 274.153 265.434 271.557 269.627 259.643 251.79 262.439 254.518 268.229 236.814ZM103.498 256.449C106.493 254.385 107.159 254.252 110.886 254.518 108.423 256.848 107.159 256.648 103.498 256.449ZM93.1149 256.382C97.3081 253.587 97.1749 254.119 102.433 254.652 97.9736 257.447 99.2382 257.114 93.1149 256.382ZM428.302 253.454C430.831 251.057 434.292 251.923 437.354 253.653 434.425 256.049 431.364 255.251 428.302 253.454ZM421.247 254.252C418.717 256.915 415.39 255.983 412.195 254.385 412.661 253.853 412.328 253.653 413.526 253.121 415.057 252.522 413.992 252.655 415.989 252.788 417.32 252.921 419.982 253.72 421.247 254.252ZM84.1295 254.918C87.3243 252.255 89.0548 253.254 93.3146 253.853 89.787 255.983 89.1214 257.048 84.1295 254.918ZM379.914 252.855C383.375 252.322 385.971 252.721 388.766 254.718 384.906 255.517 383.175 254.652 379.914 252.855ZM303.838 250.858 308.763 244.202 318.348 264.835 312.757 270.293C310.827 266.366 304.304 254.718 303.838 250.858ZM131.053 265.501C132.318 259.976 137.043 254.385 141.303 251.79 139.572 259.178 137.842 262.306 131.053 265.501ZM417.719 250.858C421.446 249.26 424.242 249.26 427.636 251.523 423.842 253.32 421.047 253.054 417.719 250.858ZM411.263 251.923C408.734 254.385 405.539 253.32 402.278 251.856 403.742 250.259 403.609 250.192 406.071 250.458 407.403 250.592 409.932 251.39 411.263 251.923ZM395.422 252.255C391.162 253.919 390.164 252.988 385.904 251.523 388.766 249.726 391.961 250.725 395.422 252.255ZM101.035 252.522C103.431 250.325 106.027 250.259 108.689 251.59 105.362 253.254 104.896 253.121 101.035 252.522ZM353.823 249.46 361.411 258.046C356.219 257.38 355.953 252.056 353.823 249.46ZM90.7188 251.457C95.2448 249.194 94.8454 249.46 100.103 250.458 96.3762 253.054 95.3779 252.855 90.7188 251.457ZM371.395 249.26 377.784 250.725C375.122 252.056 373.524 250.991 371.395 249.26ZM199.541 270.892 191.222 267.697 197.478 244.535 205.665 248.661C205.665 252.056 201.006 267.764 199.541 270.892ZM407.735 248.395C411.795 247.064 413.925 246.997 417.786 249.061 413.925 251.124 410.997 250.658 407.735 248.395ZM401.213 249.527C398.55 251.723 395.888 250.658 392.227 249.327 393.625 247.929 393.492 247.663 396.154 247.996 397.552 248.129 399.882 248.928 401.213 249.527ZM159.407 266.299 151.952 268.163C153.35 263.837 154.814 259.777 156.279 255.717 158.408 249.593 159.806 250.126 165.131 247.264L159.407 266.299ZM384.706 249.527C381.977 250.991 379.648 249.859 376.719 248.395 377.318 248.062 375.188 247.996 378.383 247.73 380.513 247.53 382.843 248.462 384.706 249.527ZM129.456 258.113C132.185 252.255 134.314 251.39 137.909 247.131 136.444 252.455 134.448 256.116 129.456 258.113ZM107.824 248.728C111.219 247.131 111.285 246.997 115.079 247.73 112.816 249.66 110.753 249.793 107.824 248.728ZM97.8405 248.195C101.102 245.6 102.832 246.332 106.826 247.131 103.431 249.593 102.1 248.994 97.8405 248.195ZM397.818 245.999C402.211 244.801 403.542 244.734 407.735 246.531 404.141 248.928 400.946 248.062 397.818 245.999ZM382.044 245.999C385.771 245.067 387.635 245.4 391.162 247.131 388.434 249.127 384.839 247.929 382.044 245.999ZM169.391 264.702 161.67 265.7C162.469 261.507 164.465 256.049 165.73 252.056 167.793 245.666 167.993 244.468 175.248 243.403L169.391 264.702ZM346.369 243.536C348.765 244.868 354.422 253.454 355.82 256.449 350.229 256.182 348.698 247.463 346.369 243.536ZM226.564 265.035 218.51 256.648C218.71 252.788 223.103 234.684 224.834 232.022 230.025 237.746 233.752 241.939 232.355 246.864 230.957 251.923 228.96 261.108 226.564 265.035ZM179.374 264.702 171.654 264.635C171.72 261.174 174.116 254.385 175.181 250.791 177.644 242.538 175.847 241.606 185.365 241.872L179.374 264.702ZM387.968 243.536C392.627 241.939 393.625 243.004 397.619 243.936 394.291 246.598 391.162 245.6 387.968 243.536ZM372.593 243.603C375.854 242.338 377.718 242.871 380.979 244.468 378.25 246.199 375.388 245.134 372.593 243.603ZM310.228 242.006 315.353 237.346 324.937 259.044 319.812 262.971 310.228 242.006ZM189.491 266.699 181.371 265.168 187.428 241.939 195.814 243.669 189.491 266.699ZM140.238 259.843C142.102 253.653 146.228 245.067 151.287 241.872 150.688 248.595 147.36 258.113 140.238 259.843ZM104.763 244.801C108.224 243.004 109.888 242.272 113.615 244.135 109.821 246.066 109.022 245.666 104.763 244.801ZM378.184 241.207C381.844 239.942 383.974 240.075 387.369 241.872 384.24 244.069 381.179 242.871 378.184 241.207ZM363.94 241.074C366.336 239.876 368.067 240.408 370.862 241.872 368 243.337 366.536 242.538 363.94 241.074ZM292.324 228.161C294.653 232.155 297.116 239.942 298.913 244.535 299.911 247.064 301.708 249.993 300.71 252.522 299.911 254.585 297.249 258.645 295.851 260.176 292.723 255.583 290.793 249.527 288.463 244.269 286.599 240.208 286 240.208 288.197 235.949 289.462 233.486 290.593 229.759 292.324 228.161ZM138.574 251.324C139.24 247.131 142.434 240.541 146.361 238.877 145.563 247.863 141.636 246.798 138.574 251.324ZM111.818 241.273C115.878 239.277 116.277 239.809 120.67 240.608 117.342 242.405 115.545 243.07 111.818 241.273ZM368.533 238.877C372.326 237.546 373.658 237.613 377.451 239.476 374.523 241.473 371.594 240.408 368.533 238.877ZM338.914 238.345C343.374 240.009 347.833 250.325 349.963 254.718 345.304 254.518 345.104 252.122 343.241 248.129 341.71 244.934 339.846 241.739 338.914 238.345ZM126.261 237.546C123.532 239.876 122.667 239.343 118.806 238.212 121.735 236.481 122.866 236.415 126.261 237.546ZM149.889 253.986C150.089 250.392 152.085 246.465 153.283 242.871 155.014 237.812 154.947 235.15 160.671 234.551 159.873 241.939 157.543 251.191 149.889 253.986ZM147.759 242.671C148.225 238.411 148.558 237.08 152.884 237.014L147.759 242.671ZM367.667 237.413C363.541 238.611 363.075 237.945 359.214 236.814 362.076 234.817 365.404 235.549 367.667 237.413ZM332.192 236.148C337.45 237.08 336.784 238.478 339.047 243.204 340.645 246.531 342.775 250.126 343.573 253.587 338.182 253.986 338.715 251.257 336.651 246.398 335.187 243.004 333.257 239.676 332.192 236.148ZM317.416 235.483 322.142 233.353 331.26 254.918 326.468 257.846C323.206 251.59 319.013 241.806 317.416 235.483ZM244.202 255.583C242.139 253.254 236.015 245.267 235.682 242.605 235.416 240.475 238.877 227.163 239.942 224.9L245.267 224.767C246.465 226.697 247.929 229.825 248.994 231.889 250.858 235.483 250.192 235.682 249.061 239.609 247.796 244.069 245.799 251.923 244.202 255.583ZM159.207 248.329C159.274 245.267 160.805 241.939 161.603 238.811 163.001 233.22 162.668 232.887 168.925 232.687 168.725 235.816 167.926 240.075 166.728 243.137 165.663 245.6 162.535 247.131 159.207 248.329ZM324.604 234.418C331.193 234.817 330.528 236.881 332.991 242.605 334.189 245.267 337.184 251.39 337.45 253.786L332.991 254.585 324.604 234.418ZM125.995 235.217C128.724 233.153 131.053 233.353 134.048 234.95 130.587 236.148 129.722 236.681 125.995 235.217ZM271.89 226.231 279.278 226.298C280.609 228.561 284.27 237.812 284.07 240.075 283.937 241.739 279.677 251.523 278.746 253.054 276.616 250.458 274.22 243.736 272.622 240.342 269.095 232.887 268.762 234.218 271.89 226.231ZM261.241 251.124C254.319 240.608 250.458 233.752 255.384 225.033L264.303 225.499C266.632 231.356 268.229 231.756 266.166 237.746 264.635 242.006 263.504 247.596 261.241 251.124ZM318.547 232.82C316.151 234.019 317.549 234.285 315.752 232.82 316.418 232.355 315.086 231.023 318.547 232.82ZM306.434 230.358C315.353 230.624 317.749 234.285 309.961 239.942 308.497 237.812 306.367 233.22 306.434 230.358ZM176.246 240.941 168.991 243.47C168.792 239.876 170.256 235.549 171.321 232.088L178.309 230.558 176.246 240.941ZM131.985 232.355C135.646 230.225 137.11 229.892 140.837 231.822 137.176 233.952 135.978 233.286 131.985 232.355ZM331.193 229.493C331.06 227.23 334.189 229.958 332.192 229.892 331.06 229.825 331.26 231.09 331.193 229.493ZM294.72 228.76 303.971 229.559C308.231 240.009 309.961 240.941 302.64 249.194L294.72 228.76ZM216.514 254.918 208.327 248.595 213.119 226.165C223.635 224.501 223.436 228.894 221.372 236.747 220.241 241.34 218.311 251.124 216.514 254.918ZM186.097 239.876 178.509 240.408C178.642 237.014 179.441 232.687 181.105 230.025L188.36 228.561 186.097 239.876ZM206.597 246.798 198.343 242.804 201.272 227.363 210.923 225.965 206.597 246.798ZM195.814 241.673 188.426 240.075 190.556 228.295 199.142 227.429C198.942 231.556 198.011 238.611 195.814 241.673ZM281.674 227.096 289.595 227.496C289.661 230.025 286.999 234.484 285.734 236.947L281.674 227.096ZM234.085 240.475C231.756 238.877 230.025 235.882 228.295 233.553 225.299 229.493 224.9 230.424 225.898 225.366L237.413 225.033 234.085 240.475ZM138.973 228.894C142.168 227.562 141.236 227.429 144.697 228.161 143.3 231.157 141.17 230.092 138.973 228.894ZM221.972 225.499C225.1 225.965 223.17 224.434 223.835 227.03L221.972 225.499ZM266.1 225.566 269.96 225.566 268.562 230.291 266.1 225.566ZM333.19 223.635 336.185 227.429C332.458 226.298 333.856 225.033 333.19 223.635ZM247.53 224.967 252.655 224.834 251.257 231.689 247.53 224.967ZM181.837 223.635C183.501 222.571 182.236 221.705 185.365 223.569 182.503 224.9 183.834 224.967 181.837 223.635ZM319.679 219.908C323.273 221.439 324.005 221.838 325.336 225.166L320.81 224.368 319.679 219.908ZM299.512 222.038C297.049 223.303 297.515 223.835 295.319 221.905 297.315 220.574 297.116 220.84 299.512 222.038ZM164.465 218.843C164.598 222.837 165.064 220.64 162.934 221.173L164.465 218.843ZM203.069 219.376C204.999 218.843 205.998 218.51 208.261 219.642 204.799 221.905 205.066 221.106 203.069 219.376ZM297.049 218.644C297.848 217.046 297.116 217.645 298.913 217.246 299.179 219.642 300.377 220.973 297.049 218.644ZM176.779 218.377C179.241 217.645 178.709 217.246 181.238 218.777 179.507 221.572 178.509 220.441 176.779 218.377ZM274.486 217.978C277.148 215.981 275.151 216.314 276.682 218.178 274.153 219.642 275.418 218.644 274.486 217.978ZM259.71 217.512 262.04 218.976C259.444 218.777 260.775 218.91 259.71 217.512ZM217.046 219.176C219.709 216.447 221.372 217.446 224.434 218.377 221.372 220.707 220.84 220.041 217.046 219.176ZM195.082 217.978C198.011 216.447 196.28 216.713 199.009 217.911 194.217 220.64 195.947 218.777 195.082 217.978ZM233.553 218.377C235.949 216.114 236.947 215.915 239.543 217.845 237.28 219.709 236.015 218.976 233.553 218.377ZM252.522 216.314C249.726 217.845 250.658 217.712 247.53 216.78 250.126 215.316 249.593 215.116 252.522 216.314ZM301.043 211.655C305.103 211.655 307.565 217.446 308.83 220.973L302.773 220.108C301.242 217.046 300.044 214.783 301.043 211.655ZM267.697 216.913 263.171 216.181C265.767 214.384 266.033 214.051 267.697 216.913ZM324.271 214.251C326.668 217.712 329.862 219.11 330.262 224.368 325.736 222.371 325.27 218.71 324.271 214.251ZM310.028 214.783C314.487 215.582 316.351 219.243 317.948 223.369L312.823 222.571 310.028 214.783ZM197.811 214.85C200.606 213.386 200.074 212.653 202.403 214.717 201.006 217.113 199.675 216.248 197.811 214.85ZM184.499 217.046C182.702 214.051 183.767 214.916 186.03 214.85 184.765 218.71 185.032 216.181 184.499 217.046ZM257.713 213.851C257.713 212.653 259.577 212.254 259.044 213.851 258.579 215.449 257.647 215.915 257.713 213.851ZM242.338 213.918 246.997 214.85C244.269 216.447 244.135 216.713 242.338 213.918ZM215.715 213.718C217.712 212.254 217.778 212.187 220.773 213.851 218.577 215.848 217.579 216.514 215.715 213.718ZM274.552 212.92C272.09 215.183 273.288 214.517 268.962 213.452 269.095 213.319 271.091 212.254 271.225 212.254 276.15 211.189 273.221 211.988 274.552 212.92ZM193.884 214.384C194.017 211.189 193.152 212.587 196.28 212.254 195.881 213.452 196.746 212.853 195.348 213.918 194.283 214.717 193.884 214.384 193.884 214.384ZM191.954 211.588C191.821 214.85 192.752 213.985 189.558 213.519 190.756 210.457 189.092 212.321 191.954 211.588ZM175.78 215.116C175.913 211.522 176.113 212.72 178.775 213.186 177.111 215.582 178.842 214.251 175.78 215.116ZM277.548 211.389 284.802 209.791C286.333 212.387 287.864 214.717 288.33 217.246 280.21 216.78 281.541 216.514 277.548 211.389ZM244.868 212.187C247.197 210.124 250.392 209.858 251.989 212.853 248.994 213.652 247.663 213.585 244.868 212.187ZM229.027 213.053C232.887 210.39 233.752 212.254 237.48 212.52 235.017 215.249 232.088 214.65 229.027 213.053ZM205.731 211.189C206.996 210.657 203.668 210.39 207.595 210.657 209.924 210.856 208.793 210.856 210.058 211.522 207.928 214.051 207.994 213.785 205.731 211.189ZM288.596 209.924 294.187 209.791 297.049 215.382 294.387 218.976C289.195 217.579 290.127 214.317 288.596 209.924ZM305.635 207.129C306.833 209.924 306.833 208.527 305.901 210.657 304.237 207.928 304.171 209.126 305.635 207.129ZM260.376 205.665 264.103 211.855 259.843 211.189C259.377 207.994 259.044 208.261 260.376 205.665ZM239.01 208.726C242.405 209.392 241.273 208.194 241.207 211.189 239.077 210.457 239.876 211.322 239.01 208.726ZM216.58 208.86C218.51 207.795 216.447 207.994 218.777 208.993 214.85 211.855 217.579 209.592 216.58 208.86ZM155.946 219.309C156.611 214.184 159.074 211.522 161.869 207.528 161.47 211.322 159.407 217.379 155.946 219.309ZM251.324 208.061C252.588 206.131 253.52 205.532 255.85 205.066L257.314 210.39C253.853 210.39 253.254 210.59 251.324 208.061ZM230.092 208.926C231.889 206.33 231.29 206.597 234.418 208.061 232.421 209.459 232.155 210.191 230.092 208.926ZM321.676 206.397C324.538 207.661 324.937 209.392 323.872 211.655 321.143 208.527 322.607 209.192 321.676 206.397ZM308.164 204.733C310.427 205.598 309.429 204.6 310.76 207.928 311.625 210.124 312.025 210.856 312.291 213.053 308.164 212.387 306.434 207.196 308.164 204.733ZM277.947 208.926 280.609 200.939 283.871 207.994 277.947 208.926ZM245.134 201.738C247.33 204.2 248.129 204.799 247.663 207.661L244.002 208.194 245.134 201.738ZM225.765 208.593C226.364 207.329 224.767 207.595 227.829 206.796 227.762 210.058 227.829 208.327 225.765 208.593ZM191.222 207.062C189.89 209.791 189.425 209.525 185.897 208.327 187.095 206.796 183.834 207.661 187.96 206.597 189.757 206.131 190.09 206.663 191.222 207.062ZM315.153 205.731C318.547 207.329 321.077 214.983 321.809 218.577 316.418 216.647 316.817 210.723 315.153 205.731ZM205.399 207.395C206.863 205.399 206.929 205.332 209.991 206.996 209.858 207.062 208.194 207.928 208.127 207.928 205.399 208.261 207.196 208.46 205.399 207.395ZM218.71 205.598C220.308 204.6 217.978 204.267 222.304 204.799 223.369 204.933 224.834 205.731 225.566 206.131 222.171 207.462 221.239 208.593 218.71 205.598ZM161.736 216.713C161.869 212.986 166.196 207.994 168.459 204.799 167.993 209.392 165.663 214.45 161.736 216.713ZM195.814 204.733C197.345 203.801 195.415 204.001 197.744 204.001 199.475 203.934 199.076 204.267 200.14 204.733 197.877 206.796 197.545 207.062 195.814 204.733ZM231.223 202.337C232.82 205.532 231.955 203.801 231.356 205.066 231.023 204.334 229.226 204.799 231.223 202.337ZM207.262 204.334C210.191 201.671 211.855 201.738 215.049 204.067 212.254 206.597 210.79 204.733 207.262 204.334ZM176.712 209.325C177.378 206.663 177.644 205.864 178.908 203.335 180.839 199.608 180.506 199.209 184.499 200.739 184.899 205.731 181.77 208.86 176.712 209.325ZM261.307 201.538 268.229 197.212 273.421 209.525 266.565 211.988C264.169 209.192 261.64 204.999 261.307 201.538ZM186.496 205.265C186.895 202.137 186.696 201.871 189.757 203.136L186.496 205.265ZM220.973 202.27 226.431 194.816C229.759 198.011 229.359 199.741 228.894 203.934 226.231 204.2 222.903 203.668 220.973 202.27ZM197.877 201.738C201.272 200.274 202.67 199.076 204.733 202.403 202.137 203.601 200.739 203.269 197.877 201.738ZM242.006 207.196C238.079 207.062 237.879 206.929 235.949 203.535L242.605 196.413C244.601 199.541 242.871 203.868 242.006 207.196ZM168.725 213.053C169.124 208.993 172.119 199.408 176.712 199.342 176.579 204.6 174.249 212.121 168.725 213.053ZM319.08 198.21 322.142 203.668C319.213 203.069 318.814 201.006 319.08 198.21ZM210.324 198.476 212.321 200.473C207.795 200.34 210.191 200.606 210.324 198.476ZM305.302 198.343C306.367 198.743 306.567 198.343 307.499 199.741 309.695 203.069 307.765 202.67 307.299 203.468 304.504 201.338 306.034 201.338 305.302 198.343ZM248.994 189.824C251.191 191.688 254.119 200.007 254.652 203.202 252.522 204.666 252.122 205.199 249.66 205.598 246.798 199.941 243.803 195.415 248.994 189.824ZM203.868 189.691C207.395 193.418 206.264 193.751 205.399 198.543L199.209 198.809 203.868 189.691ZM270.759 196.613C273.221 194.949 275.751 194.084 278.479 193.951 280.41 198.942 278.147 203.735 275.684 208.527L270.759 196.613ZM207.262 199.541 207.795 194.882C209.192 196.48 209.259 195.215 208.993 197.145 208.726 199.209 207.395 199.342 207.262 199.541ZM294.853 193.951C299.246 194.35 298.38 196.28 299.978 200.606 301.043 203.735 302.906 206.131 301.575 208.926 297.315 209.059 298.247 207.196 297.049 202.736 296.317 199.808 295.052 196.813 294.853 193.951ZM300.044 195.814C303.572 196.813 305.435 202.603 304.038 205.598L300.044 195.814ZM283.804 192.752 287.531 192.353 293.588 207.795 287.798 207.928C286.533 204.267 283.005 196.08 283.804 192.752ZM262.239 198.011 264.902 187.894 267.83 195.481C265.367 197.012 264.502 197.744 262.239 198.011ZM290.327 192.686C294.187 193.352 296.25 204.334 296.517 208.127 294.32 206.73 290.46 195.748 290.327 192.686ZM235.083 201.605C231.689 197.079 231.889 196.746 233.02 191.355 233.752 187.827 233.885 183.967 235.882 180.972 237.679 182.636 241.14 191.887 241.739 194.483L235.083 201.605ZM176.113 197.611C175.181 195.282 176.645 192.819 178.509 190.623 178.442 193.684 178.642 196.014 176.113 197.611ZM218.577 202.204 221.239 185.431C223.436 187.095 224.9 190.556 225.566 193.218L218.577 202.204ZM230.092 193.951C228.228 190.822 228.827 190.955 231.423 188.227 231.489 190.29 231.622 192.819 230.092 193.951ZM285.867 187.028C287.265 188.892 286.932 187.295 287.199 190.423L284.669 190.556 285.867 187.028ZM252.189 186.696C253.653 188.692 254.518 191.421 255.45 193.951 256.848 197.345 258.379 199.009 257.048 201.804 254.851 199.342 249.793 189.158 252.189 186.696ZM197.278 198.077 199.342 184.3 202.603 187.295 197.278 198.077ZM309.362 185.431C311.559 187.627 311.559 189.158 312.557 192.153 314.155 197.345 315.353 198.144 314.021 201.338 311.359 197.212 309.895 190.49 309.362 185.431ZM288.53 181.504C290.061 183.567 291.192 187.694 291.525 190.556 288.729 190.556 289.195 191.155 288.33 187.494 287.598 184.632 287.398 184.233 288.53 181.504ZM253.786 184.899 261.84 177.977C264.902 183.368 262.572 194.15 259.577 199.009L253.786 184.899ZM249.926 179.907C251.656 182.369 252.122 183.235 250.392 185.83 248.528 183.102 249.127 183.301 249.926 179.907ZM193.352 202.071C190.556 202.27 190.955 202.27 188.892 200.34L197.212 181.704C197.744 185.697 195.748 199.475 193.352 202.071ZM185.498 198.743 182.17 198.011 188.227 181.903 185.498 198.743ZM215.782 201.605 211.588 196.679 219.442 182.702C219.709 187.295 217.845 198.077 215.782 201.605ZM271.491 171.321 277.614 191.421 270.226 194.416C269.028 191.754 266.499 185.098 266.432 182.503 266.366 178.709 268.229 172.918 271.491 171.321ZM180.173 197.345C178.11 193.019 182.103 184.832 185.83 182.569L180.173 197.345ZM304.237 180.173C307.299 182.835 307.233 185.231 308.231 189.092 309.096 192.553 311.093 196.679 310.028 199.142 308.031 196.546 304.038 183.501 304.237 180.173ZM210.524 194.882C207.528 191.022 208.527 190.756 209.459 186.03 210.058 182.835 210.257 178.642 211.722 175.98L218.111 181.438 210.524 194.882ZM167.128 197.478C167.194 190.49 170.189 186.696 173.384 181.171 172.985 184.765 169.058 195.015 167.128 197.478ZM292.523 175.714C294.054 179.175 296.65 188.493 296.517 192.087 292.523 191.421 292.856 188.759 291.791 184.832 290.526 180.306 288.862 178.642 292.523 175.714ZM242.072 172.519C245.4 179.041 250.192 186.696 243.47 192.752 242.072 190.29 240.408 186.03 239.144 183.035 237.147 178.309 238.079 175.647 242.072 172.519ZM206.996 189.824C202.803 186.895 207.728 182.236 208.793 178.642L206.996 189.824ZM188.093 197.412C187.894 192.353 189.757 180.972 192.02 177.178L195.814 180.24 188.093 197.412ZM312.624 176.179C315.153 180.905 318.215 188.692 318.614 194.15 316.018 191.688 312.89 180.04 312.624 176.179ZM227.363 189.757C218.644 180.506 223.702 172.452 229.626 162.934 234.684 171.92 236.215 182.569 227.363 189.757ZM298.713 175.381C302.44 177.045 305.968 192.02 306.367 196.48 301.708 194.882 300.643 180.173 298.713 175.381ZM253.32 182.968C250.924 177.444 250.658 175.98 252.056 170.056 252.988 165.863 253.52 160.805 255.384 157.144 257.647 161.27 260.908 171.121 261.108 175.847 258.445 179.108 255.983 180.173 253.32 182.968ZM198.011 174.382 196.413 178.376 193.684 175.647 198.011 174.382ZM182.968 181.97 183.9 174.516C186.829 174.582 186.829 174.516 188.826 175.98L182.968 181.97ZM266.765 174.649 264.902 179.108 263.837 176.379 266.765 174.649ZM203.735 185.764C198.277 181.97 200.14 178.908 201.405 173.051L208.394 174.116 203.735 185.764ZM173.317 194.416C173.317 191.488 174.649 186.363 175.381 183.301 176.379 178.842 176.512 175.98 181.504 175.447 178.576 182.769 177.511 187.228 173.317 194.416ZM294.919 172.319C297.448 175.181 301.442 190.157 301.442 193.951 297.781 192.752 297.848 187.96 296.783 184.033 295.918 180.905 292.989 174.649 294.919 172.319ZM213.652 168.525 217.911 178.709 211.655 173.251 213.652 168.525ZM274.286 170.256C275.751 173.118 280.21 188.36 280.077 191.288 277.947 189.158 272.489 171.72 274.286 170.256ZM236.747 176.379C236.082 172.985 237.213 167.261 238.611 164.399L240.941 170.256 236.747 176.379ZM289.262 175.98 287.931 169.324 291.591 170.921C291.658 174.183 291.858 173.65 289.262 175.98ZM276.216 169.79 281.541 168.392C283.471 176.712 287.931 183.301 282.273 190.29L276.216 169.79ZM243.07 170.256 245.932 166.329C247.663 171.454 249.726 174.782 247.863 180.04L243.07 170.256ZM299.312 168.592 299.911 173.317C296.583 172.319 295.718 169.723 299.312 168.592ZM287.132 180.639C285.202 178.176 283.138 170.655 284.337 168.459 287.465 170.256 288.33 177.311 287.132 180.639ZM304.104 167.46C307.898 169.124 309.496 178.842 309.496 182.968 306.7 178.642 304.903 172.718 304.104 167.46ZM304.637 177.511C302.174 175.514 300.111 169.856 301.575 166.994 303.372 169.524 304.504 174.316 304.637 177.511ZM202.137 170.921C202.071 166.994 204.799 160.139 206.863 156.678 211.722 162.801 212.121 164.931 209.592 172.585L202.137 170.921ZM224.434 168.925C224.567 164.665 225.299 158.408 227.163 155.014L228.827 160.272 224.434 168.925ZM276.017 167.66C272.023 161.204 273.421 155.613 277.281 149.157L280.809 166.662 276.017 167.66ZM242.272 167.793C237.213 160.605 239.609 157.809 240.941 149.822 241.939 152.684 242.871 155.48 243.803 158.608 245.333 163.667 245.999 163.8 242.272 167.793ZM235.15 171.387C232.088 163.334 229.359 163.4 233.286 155.147 238.877 161.404 236.348 163.8 235.15 171.387ZM178.642 174.05C178.576 168.725 182.17 161.47 185.032 157.41 184.899 160.871 184.1 164.066 183.434 167.527 182.636 171.72 183.035 173.051 178.642 174.05ZM301.974 158.275C303.905 161.27 305.103 161.537 305.369 165.663 302.307 164.731 301.974 161.404 301.974 158.275ZM253.121 157.077 249.726 171.454 247.064 164.266 253.121 157.077ZM300.843 165.397C298.913 162.202 297.914 156.545 297.781 152.618 299.645 155.413 301.974 162.469 300.843 165.397ZM263.703 173.983C261.374 170.722 259.577 160.871 258.312 156.478 256.915 151.353 256.382 152.352 259.51 148.624 261.441 146.295 262.772 144.964 265.168 143.633 266.565 147.16 268.562 156.345 269.294 160.405 270.293 166.063 268.762 171.188 263.703 173.983ZM192.153 174.05C192.553 169.524 198.011 151.486 200.274 147.759 204.866 151.886 202.337 156.811 201.338 163.001 200.14 170.189 199.941 172.585 192.153 174.05ZM220.374 177.244C214.317 168.326 214.118 166.262 216.114 161.337 218.045 156.478 220.441 148.292 223.103 144.564 226.564 151.686 225.166 153.35 223.835 160.805 222.903 166.262 222.371 172.119 220.374 177.244ZM229.892 157.077C227.562 152.751 227.03 151.087 228.361 146.295 230.025 148.158 231.689 151.087 232.221 153.683L229.892 157.077ZM309.229 159.274C310.095 162.735 311.625 167.194 311.958 170.655 308.564 164.399 305.702 156.412 305.302 149.09 305.968 151.62 308.164 155.28 309.229 159.274ZM239.41 143.167C240.275 147.493 239.144 153.749 237.08 157.543L234.418 152.684 239.41 143.167ZM280.476 155.68C279.611 151.753 278.28 147.626 278.746 143.633 280.41 146.761 285.335 164.332 284.337 166.861 281.475 165.064 281.208 159.141 280.476 155.68ZM187.162 145.563C188.027 148.158 186.496 151.287 184.366 153.483L187.162 145.563ZM246.598 161.736C242.738 150.555 239.809 141.902 246.132 130.521L253.919 152.152 246.598 161.736ZM195.548 155.68C195.348 152.352 195.947 147.559 197.678 144.897 199.741 149.09 196.679 149.956 195.548 155.68ZM184.699 172.386C185.564 167.66 191.887 145.962 194.283 143.633 196.28 146.96 190.822 170.788 189.558 173.517L184.699 172.386ZM297.382 167.128C294.653 165.264 291.458 148.624 291.259 145.496 295.585 147.626 295.186 150.555 296.184 154.881 297.715 161.803 299.379 164.132 297.382 167.128ZM293.854 163.933C294.786 167.593 295.651 168.059 293.189 169.324 291.325 165.73 290.859 160.272 289.994 156.079 288.397 148.758 287.731 148.425 288.197 143.167 288.197 142.701 288.463 142.701 288.596 142.434L293.854 163.933ZM275.085 138.774C276.35 144.897 275.617 150.488 272.09 155.48 270.626 151.02 268.629 145.762 268.429 141.17L275.085 138.774ZM218.444 134.847C223.303 139.306 221.705 142.301 219.642 147.559 218.045 151.753 216.647 157.011 213.985 160.339L218.444 134.847ZM226.231 146.894C224.967 144.564 224.035 143.233 224.834 140.97 227.562 143.433 227.03 143.1 226.231 146.894ZM287.332 151.486C288.397 156.745 290.194 162.535 290.327 167.727 285.734 167.46 286.932 167.194 285.668 162.136 285.069 159.74 284.403 157.344 283.804 154.881 282.673 150.421 280.343 143.965 280.276 139.572 286 139.373 284.669 139.24 287.332 151.486ZM262.306 128.923 264.569 141.236 257.048 148.225 262.306 128.923ZM233.153 150.222C226.697 141.236 229.493 138.441 232.687 120.004 234.551 124.73 236.215 131.053 237.546 136.045 238.744 140.704 236.614 147.293 233.153 150.222ZM283.471 134.78C285.202 138.042 284.203 136.71 283.538 138.108 281.075 136.71 280.942 137.576 283.471 134.78ZM203.535 147.892C199.808 145.296 201.006 144.232 202.137 139.972 203.136 136.444 202.869 134.181 206.264 133.05L203.535 147.892ZM211.855 160.339C206.53 155.946 207.861 152.551 209.858 146.827L213.452 135.579C215.049 131.719 213.519 133.848 215.848 132.917 217.179 135.978 213.452 156.678 211.855 160.339ZM198.543 143.899C196.347 140.371 198.011 136.977 201.472 135.646 201.072 138.574 200.407 141.969 198.543 143.899ZM206.264 151.62C204.4 149.157 205.998 145.03 206.53 141.902 207.129 138.508 207.462 134.913 208.527 131.519L212.653 132.384 206.264 151.62ZM183.9 146.228C184.033 141.236 185.964 135.379 188.426 131.386 188.227 135.446 186.363 142.967 183.9 146.228ZM215.782 131.253C214.916 129.389 215.648 128.657 216.314 127.592 217.845 130.521 218.045 130.055 215.782 131.253ZM290.127 127.659C292.124 131.652 293.522 140.172 293.788 145.03 289.395 140.837 290.393 143.699 288.729 136.777 287.465 131.519 285.135 129.389 290.127 127.659ZM303.039 156.212C298.913 152.684 294.586 130.254 294.453 128.391 298.78 130.853 302.707 150.621 303.039 156.212ZM248.728 125.795C250.791 129.655 252.655 138.175 252.721 142.834 251.39 140.305 250.392 137.043 249.26 134.115 247.663 129.855 246.531 129.389 248.728 125.795ZM229.692 126.993 227.096 141.037 224.967 138.641 229.692 126.993ZM267.897 139.373C266.1 136.91 263.104 125.595 263.504 122.334 263.77 119.805 267.431 108.49 268.562 106.626 269.893 109.555 274.419 133.582 274.22 136.91L267.897 139.373ZM210.856 117.276C215.116 121.336 215.449 124.996 213.319 130.587L208.66 129.655 210.856 117.276ZM255.717 145.296C254.053 139.306 252.721 133.249 251.257 127.193 250.126 122.6 250.192 122.667 252.988 119.073 254.518 117.076 256.249 114.879 257.647 113.215 260.242 119.871 261.574 122.201 259.843 129.123 258.579 134.048 257.314 140.904 255.717 145.296ZM246.598 124.53C246.531 124.397 246.332 124.131 246.332 123.865 245.799 121.202 247.064 119.738 247.596 118.34 248.528 122.4 248.861 121.602 246.997 125.196 246.864 124.996 246.665 124.73 246.598 124.53ZM239.743 137.243C238.012 133.249 236.947 127.459 235.616 123.066 233.353 115.412 233.486 116.61 236.481 109.754 238.278 111.685 242.072 120.004 243.204 122.933 244.801 127.259 242.605 133.982 239.743 137.243ZM223.236 137.11C217.379 131.186 219.842 124.996 221.04 117.608 222.171 110.553 223.236 104.097 225.965 97.5743 227.962 101.767 232.155 114.081 230.89 118.34 229.825 122.201 225.299 134.448 223.236 137.11ZM282.806 110.553C287.997 113.415 289.794 119.472 289.927 125.728L286.866 126.993C285.002 124.264 282.872 114.081 282.806 110.553ZM203.735 108.823 208.194 112.616C207.728 116.277 205.998 120.87 204.866 124.53 203.468 129.123 203.735 132.917 198.942 133.982L203.735 108.823ZM249.793 119.938C247.663 113.548 249.127 102.233 252.122 96.1766 253.054 97.9736 256.316 107.425 256.515 109.488 256.848 112.217 251.79 118.141 249.793 119.938ZM190.955 142.102C189.491 139.04 192.886 127.592 193.684 123.199 194.816 117.209 195.015 111.352 200.673 108.623 200.606 113.349 193.751 138.907 190.955 142.102ZM266.233 106.56 262.173 119.139 260.642 109.355 266.233 106.56ZM244.468 121.202C242.538 118.074 240.342 113.615 238.811 109.888 237.147 105.694 239.876 100.703 242.205 97.1749 245.4 103.565 247.197 114.347 244.468 121.202ZM281.541 122.6C282.606 127.858 284.203 133.249 279.078 136.91L271.158 104.363 278.28 105.961C279.544 111.418 280.476 117.076 281.541 122.6ZM270.293 102.3C270.293 98.7723 272.889 93.9136 275.617 91.5175L277.548 103.764 270.293 102.3ZM232.355 110.021C230.291 107.159 228.428 99.9704 227.762 96.0434 231.889 101.302 235.549 102.633 232.355 110.021ZM247.197 107.225C245.799 104.297 244.468 98.972 244.069 95.5775 243.536 91.6506 246.132 89.2545 247.929 85.6604 252.255 90.5191 249.993 101.767 247.197 107.225ZM236.481 104.23C234.817 100.37 238.212 89.1214 239.21 84.4623 242.272 91.4509 241.34 98.5061 236.481 104.23ZM216.314 121.602C211.123 116.077 211.123 113.615 212.454 106.493 213.452 100.969 214.916 95.4444 216.713 89.9201L223.17 91.1181C223.835 101.435 217.911 113.149 216.314 121.602ZM256.848 103.964C255.717 101.634 254.785 98.373 253.919 95.6441 252.455 90.985 252.921 91.6506 254.186 87.9899L256.848 103.964ZM203.934 106.626C204.799 98.3064 206.33 95.2448 212.92 91.1181 212.72 96.6425 210.856 105.295 208.726 110.487L203.934 106.626ZM292.124 106.959C293.455 112.35 295.984 120.87 296.317 126.128 291.192 118.207 285.801 100.436 285.468 90.5857 287.265 95.9103 290.46 100.103 292.124 106.959ZM281.608 86.326C283.405 89.1214 287.065 107.957 287.199 112.084 283.405 107.625 281.608 107.957 280.143 101.701 279.145 97.5743 277.414 92.05 277.148 87.9233L281.608 86.326ZM260.376 107.558C258.379 104.563 251.989 80.9347 258.845 71.8828 261.174 73.9461 267.897 99.1717 267.497 103.099L260.376 107.558ZM219.376 81.2675C220.574 83.5305 222.105 86.3925 222.238 89.1214L217.246 88.0565 219.376 81.2675ZM251.523 88.3893C249.726 84.3958 248.528 82.8649 251.989 79.537 253.52 82.9315 253.121 85.3941 251.523 88.3893ZM243.004 90.0532C239.609 83.464 240.275 77.4072 241.872 70.6848 243.47 72.7481 245.799 78.2059 246.598 80.8682 247.53 84.1961 245.4 87.9233 243.004 90.0532ZM234.019 100.503C225.898 92.5159 227.696 91.8503 229.426 87.1912 231.29 81.9997 232.82 75.8098 235.616 71.2172 238.544 77.0078 238.278 80.2026 237.014 86.5256 236.348 90.1198 235.483 97.774 234.019 100.503ZM207.728 92.7155C207.861 87.524 210.856 78.0062 212.853 72.615L215.515 75.477C215.915 82.8649 214.051 89.4542 207.728 92.7155ZM275.085 74.2124C277.548 77.5403 280.809 80.3357 281.142 84.4623L276.882 85.9266C275.351 83.1312 274.153 76.675 275.085 74.2124ZM206.597 73.7465C207.861 75.1442 207.462 73.6799 207.595 76.0094L206.197 83.0646C204.2 93.248 204.4 97.2415 197.478 106.16L206.597 73.7465ZM263.97 69.1539 270.16 70.4185C273.754 81.4672 277.015 87.7237 269.028 97.6408L263.97 69.1539ZM239.277 74.9445C237.613 71.0176 235.949 67.8893 238.345 63.8293 242.072 68.089 240.275 69.4202 239.277 74.9445ZM248.661 80.8016C247.73 79.4705 243.736 70.0858 243.27 68.2887 242.471 65.1605 244.269 58.1053 245.666 54.8439L246.265 53.779C247.929 57.5728 248.794 63.4299 249.726 67.6897 251.124 74.2124 252.788 75.2773 248.661 80.8016ZM225.965 90.7188C225.699 86.925 230.225 62.1653 231.689 58.1718 235.749 65.2936 234.751 67.5565 232.022 74.9445 230.158 79.9364 228.627 86.7253 225.965 90.7188ZM279.677 64.029C281.608 70.0192 286.267 80.3357 286.733 87.4574 282.739 84.7286 280.01 69.4867 279.677 64.029ZM263.703 67.0906C261.041 62.0322 262.306 58.5712 265.7 53.9787L269.161 68.2221 263.703 67.0906ZM234.085 57.8391C236.548 60.3683 238.079 61.2335 235.816 64.2952L234.085 57.8391ZM210.856 70.0192C210.657 65.6929 211.322 61.3666 214.584 58.8374 214.251 62.0988 213.053 67.8893 210.856 70.0192ZM224.234 86.2594C218.843 80.7351 220.64 76.4088 222.77 69.8195 224.501 64.4949 225.699 56.5079 229.027 52.5144 230.957 55.5095 229.825 57.3066 229.027 61.2335 227.696 67.8893 225.965 81.9331 224.234 86.2594ZM240.874 64.1621C238.811 61.1004 239.343 61.6994 240.941 58.3715 241.806 56.641 242.738 54.5777 243.669 53.18 243.603 56.1085 242.538 62.0322 240.874 64.1621ZM213.386 70.1523C214.317 65.9592 216.78 55.9754 219.842 53.779L216.181 73.3471 213.386 70.1523ZM218.377 76.0094C217.579 71.1507 220.907 57.4397 222.105 51.5826L226.298 52.3813C225.366 56.8407 220.574 73.0809 218.377 76.0094ZM272.09 63.5631C270.293 55.1767 268.096 50.9836 271.091 48.9868 273.221 50.7173 278.679 72.1491 279.078 76.076 274.752 72.9478 273.288 69.3536 272.09 63.5631ZM249.527 58.904C248.262 53.1134 245.4 48.055 249.46 44.1946 251.39 46.857 254.053 55.7757 255.517 59.4365 257.713 65.0939 259.178 70.352 253.52 74.2789 251.324 70.951 250.458 63.2968 249.527 58.904ZM224.368 42.5972C227.163 45.3261 228.894 47.456 226.364 50.4511 223.769 49.9852 224.301 50.4511 222.571 48.7871L224.368 42.5972ZM238.079 59.503C232.421 53.7124 232.155 52.9137 234.085 44.9268 235.35 39.8018 236.215 31.482 238.944 27.4885 244.601 41.7985 247.064 47.5225 238.079 59.503ZM269.494 47.9884C267.83 45.9251 268.096 45.7255 268.096 42.93 270.293 45.3261 271.957 46.391 269.494 47.9884ZM255.184 38.2044C256.848 43.529 260.642 58.0387 259.178 62.8309 257.447 60.9007 251.856 45.3261 251.191 42.1313L255.184 38.2044ZM230.89 49.6524C229.493 45.9251 231.023 44.0615 232.887 40.8002 232.887 43.7287 232.088 47.7222 230.89 49.6524ZM213.519 56.9738C213.585 53.6459 214.85 50.2514 215.582 46.9901 216.647 42.5972 215.915 40.1346 220.507 40.0015 219.243 45.5924 217.778 53.2465 213.519 56.9738ZM262.439 38.5372C265.966 38.5372 265.234 39.0031 265.966 42.3976 267.564 49.8521 267.165 47.2563 266.033 49.0534L262.439 38.5372ZM261.174 57.3066 257.181 37.4723C261.84 37.8716 261.84 42.1313 263.437 46.5907 264.968 50.8504 263.97 53.9787 261.174 57.3066ZM250.592 39.9349C249.127 37.206 251.257 33.6119 253.254 30.8164L254.785 35.8749 250.592 39.9349ZM247.73 38.0713 245.866 43.6622 242.471 30.5502 247.73 38.0713ZM255.384 31.5486C254.452 28.4203 253.72 27.8879 255.717 24.8928L259.311 35.409C255.65 35.6752 256.449 35.5421 255.384 31.5486ZM218.111 38.1378C218.577 32.8797 222.105 28.7531 225.499 25.6249 225.366 28.62 224.301 30.2174 223.103 33.0794 221.572 37.0063 222.304 37.5388 218.111 38.1378ZM264.303 36.3408C260.176 36.607 260.975 34.4771 259.377 30.0177 258.379 27.2223 257.181 24.7596 257.181 21.8976L259.51 20.9658C261.507 23.6947 263.837 32.5469 264.303 36.3408ZM240.674 23.3619C242.738 20.4999 246.665 16.8392 248.528 14.4431 251.989 23.495 253.587 27.7548 248.262 36.1411 246.265 33.2791 241.273 26.357 240.674 23.3619ZM228.694 45.1264C224.7 41.0664 224.567 39.9349 226.032 34.344 227.163 29.8181 227.562 25.9577 229.825 21.8976L237.08 23.2954C236.881 27.4885 231.09 41.3992 228.694 45.1264ZM232.421 15.6412 235.483 21.0324 230.424 19.8343 232.421 15.6412ZM251.59 17.0389C253.454 19.9675 255.45 20.4999 253.187 24.0941L251.59 17.0389ZM222.704 25.6915C223.036 23.2954 223.769 22.0973 224.7 19.8343 225.965 16.573 225.433 16.4399 228.96 15.7077 228.295 19.9675 225.499 22.7629 222.704 25.6915ZM252.389 13.9107 257.713 14.9756C259.178 16.9723 259.51 17.172 258.778 19.3684L255.85 20.5665 252.389 13.9107ZM240.874 20.1671C241.273 16.6395 243.403 11.2483 245.6 8.31977L247.863 12.3798 240.874 20.1671ZM233.353 13.3782C234.551 11.3814 236.082 9.65094 237.48 7.78731 239.743 4.72563 239.809 4.72563 243.603 5.92368 243.27 9.78405 240.075 17.0389 237.746 20.5665L233.353 13.3782ZM251.856 3.72726 256.848 12.4464C250.325 12.7126 249.926 11.5146 246.798 5.724L251.856 3.72726ZM234.285 8.98535C234.551 5.59089 234.551 4.99186 237.28 4.52596 236.415 7.85386 236.082 6.45614 234.285 8.98535ZM248.062 2.72889C245.866 4.4594 246.332 4.79219 244.135 2.12986L248.062 2.72889ZM212.92 42.7969C192.553 87.524 177.777 167.727 152.418 214.85 146.827 225.166 147.493 223.436 136.777 228.028 116.144 236.881 1.66395 291.858 0 293.854 5.19154 298.048 115.478 323.073 131.519 327 175.381 337.783 219.176 350.096 263.038 361.344 299.578 370.729 286.4 372.26 322.674 352.026 340.379 342.109 357.883 331.659 375.455 321.543 386.903 314.953 479.685 266.033 481.149 261.707L460.782 256.515C430.565 251.523 381.312 237.613 348.365 230.491 341.577 229.027 342.508 228.228 339.18 222.504 326.468 200.739 312.757 159.873 305.435 136.577 293.056 97.5743 285.535 72.8146 271.557 33.5453 270.093 29.4187 254.652 2.52921 251.656 1.39772 247.596-0.0665582 242.671-0.599024 238.811 0.865256 226.497 5.59089 217.645 30.6168 212.92 42.7969Z" fill="#5B5B5B" fill-rule="evenodd" transform="matrix(1.00127 0 0 1 2443.12 2286)"/></g></g></g><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1431.35 2743)">param</text><path d="M3440 1969C3440 1993.3 3420.3 2013 3396 2013 3371.7 2013 3352 1993.3 3352 1969 3352 1944.7 3371.7 1925 3396 1925 3420.3 1925 3440 1944.7 3440 1969Z" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3422 1968.5C3422 1983.14 3410.36 1995 3396 1995 3381.64 1995 3370 1983.14 3370 1968.5 3370 1953.86 3381.64 1942 3396 1942 3410.36 1942 3422 1953.86 3422 1968.5Z" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3290 2253 3503.19 2253" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M3290 2130 3503.19 2130" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 0.236385 102.701" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 3396.24 2013)"/><path d="M3409 2129C3409 2136.18 3403.18 2142 3396 2142 3388.82 2142 3383 2136.18 3383 2129 3383 2121.82 3388.82 2116 3396 2116 3403.18 2116 3409 2121.82 3409 2129Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 1858.26 2933)">clip_log_std<tspan font-size="64" x="1353.34" y="-1068">clip_actions</tspan></text></g></svg>
\ No newline at end of file

From 91c71eb47039ce0bc18a7abd9503c8ef997143be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 26 Jul 2022 22:45:31 +0200
Subject: [PATCH 012/108] Add multivariate gaussian model to docs

---
 docs/source/index.rst                         |   2 +
 .../skrl.models.multivariate_gaussian.rst     |  43 +++++++
 docs/source/snippets/gaussian_model.py        |  14 ++-
 .../snippets/multivariate_gaussian_model.py   | 116 ++++++++++++++++++
 4 files changed, 169 insertions(+), 6 deletions(-)
 create mode 100644 docs/source/modules/skrl.models.multivariate_gaussian.rst
 create mode 100644 docs/source/snippets/multivariate_gaussian_model.py

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 69dc9550..e26408f7 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -128,6 +128,7 @@ Models
     * :doc:`Tabular model <modules/skrl.models.tabular>` (discrete domain)
     * :doc:`Categorical model <modules/skrl.models.categorical>` (discrete domain)
     * :doc:`Gaussian model <modules/skrl.models.gaussian>` (continuous domain)
+    * :doc:`Multivariate Gaussian model <modules/skrl.models.multivariate_gaussian>` (continuous domain)
     * :doc:`Deterministic model <modules/skrl.models.deterministic>` (continuous domain)
 
 .. toctree::
@@ -139,6 +140,7 @@ Models
     modules/skrl.models.tabular
     modules/skrl.models.categorical
     modules/skrl.models.gaussian
+    modules/skrl.models.multivariate_gaussian
     modules/skrl.models.deterministic
 
 Trainers
diff --git a/docs/source/modules/skrl.models.multivariate_gaussian.rst b/docs/source/modules/skrl.models.multivariate_gaussian.rst
new file mode 100644
index 00000000..3ad21398
--- /dev/null
+++ b/docs/source/modules/skrl.models.multivariate_gaussian.rst
@@ -0,0 +1,43 @@
+.. _models_multivariate_gaussian:
+
+Multivariate Gaussian model
+===========================
+
+Concept
+^^^^^^^
+
+.. image:: ../_static/imgs/model_multivariate_gaussian.svg
+      :width: 100%
+      :align: center
+      :alt: Multivariate Gaussian model
+
+Basic usage
+^^^^^^^^^^^
+
+.. tabs::
+    
+    .. tab:: Multi-Layer Perceptron (MLP)
+
+        .. literalinclude:: ../snippets/multivariate_gaussian_model.py
+            :language: python
+            :linenos:
+            :start-after: [start-mlp]
+            :end-before: [end-mlp]
+
+    .. tab:: Convolutional Neural Network (CNN)
+
+        .. literalinclude:: ../snippets/multivariate_gaussian_model.py
+            :language: python
+            :linenos:
+            :start-after: [start-cnn]
+            :end-before: [end-cnn]
+
+API
+^^^
+
+.. autoclass:: skrl.models.torch.multivariate_gaussian.MultivariateGaussianModel
+   :show-inheritance:
+   :members:
+   
+   .. automethod:: __init__
+   .. automethod:: compute
diff --git a/docs/source/snippets/gaussian_model.py b/docs/source/snippets/gaussian_model.py
index 6e7f3f8e..0b1d5f85 100644
--- a/docs/source/snippets/gaussian_model.py
+++ b/docs/source/snippets/gaussian_model.py
@@ -18,9 +18,9 @@ class DummyEnv:
 # define the model
 class MLP(GaussianModel):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+                         clip_log_std, min_log_std, max_log_std, reduction)
 
         self.linear_layer_1 = nn.Linear(self.num_observations, 128)
         self.linear_layer_2 = nn.Linear(128, 64)
@@ -42,7 +42,8 @@ def compute(self, states, taken_actions):
              clip_actions=True,
              clip_log_std=True,
              min_log_std=-20,
-             max_log_std=2)
+             max_log_std=2,
+             reduction="sum")
 # [end-mlp]
 
 import torch
@@ -72,9 +73,9 @@ class DummyEnv:
 # define the model
 class CNN(GaussianModel):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+                         clip_log_std, min_log_std, max_log_std, reduction)
 
         self.net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=4, stride=2),
                                  nn.ReLU(),
@@ -107,7 +108,8 @@ def compute(self, states, taken_actions):
              clip_actions=True,
              clip_log_std=True,
              min_log_std=-20,
-             max_log_std=2)
+             max_log_std=2,
+             reduction="sum")
 # [end-cnn]
 
 import torch
diff --git a/docs/source/snippets/multivariate_gaussian_model.py b/docs/source/snippets/multivariate_gaussian_model.py
new file mode 100644
index 00000000..69fbad38
--- /dev/null
+++ b/docs/source/snippets/multivariate_gaussian_model.py
@@ -0,0 +1,116 @@
+import gym
+
+class DummyEnv:
+    observation_space = gym.spaces.Box(low=-1, high=1, shape=(5,))
+    action_space = gym.spaces.Box(low=-1, high=1, shape=(3,))
+    device = "cuda:0"
+
+env = DummyEnv()
+
+# [start-mlp]
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from skrl.models.torch import MultivariateGaussianModel
+
+
+# define the model
+class MLP(MultivariateGaussianModel):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+        super().__init__(observation_space, action_space, device, clip_actions,
+                         clip_log_std, min_log_std, max_log_std)
+
+        self.linear_layer_1 = nn.Linear(self.num_observations, 128)
+        self.linear_layer_2 = nn.Linear(128, 64)
+        self.linear_layer_3 = nn.Linear(64, 32)
+        self.mean_action_layer = nn.Linear(32, self.num_actions)
+
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+
+    def compute(self, states, taken_actions):
+        x = F.relu(self.linear_layer_1(states))
+        x = F.relu(self.linear_layer_2(x))
+        x = F.relu(self.linear_layer_3(x))
+        return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = MLP(observation_space=env.observation_space, 
+             action_space=env.action_space, 
+             device=env.device, 
+             clip_actions=True,
+             clip_log_std=True,
+             min_log_std=-20,
+             max_log_std=2)
+# [end-mlp]
+
+import torch
+policy.to(env.device)
+actions = policy.act(torch.randn(10, 5, device=env.device), torch.randn(10, 3, device=env.device))
+assert actions[0].shape == torch.Size([10, env.action_space.shape[0]])
+
+# =============================================================================
+
+import gym
+
+class DummyEnv:
+    observation_space = gym.spaces.Box(low=0, high=255, shape=(256, 256, 1))
+    action_space = gym.spaces.Box(low=-1, high=1, shape=(2,))
+    device = "cuda:0"
+
+env = DummyEnv()
+
+# [start-cnn]
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from skrl.models.torch import MultivariateGaussianModel
+
+
+# define the model
+class CNN(MultivariateGaussianModel):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+        super().__init__(observation_space, action_space, device, clip_actions,
+                         clip_log_std, min_log_std, max_log_std)
+
+        self.net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=4, stride=2),
+                                 nn.ReLU(),
+                                 nn.Conv2d(64, 32, kernel_size=4, stride=2),
+                                 nn.ReLU(),
+                                 nn.Conv2d(32, 16, kernel_size=2, stride=2),
+                                 nn.ReLU(),
+                                 nn.Conv2d(16, 8, kernel_size=2, stride=2),
+                                 nn.ReLU(),
+                                 nn.Flatten(),
+                                 nn.Linear(1800, 256),
+                                 nn.ReLU(),
+                                 nn.Linear(256, 16),
+                                 nn.Tanh(),
+                                 nn.Linear(16, 32),
+                                 nn.Tanh(),
+                                 nn.Linear(32, self.num_actions))
+        
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+
+    def compute(self, states, taken_actions):
+        # permute (samples, width, height, channels) -> (samples, channels, width, height) 
+        return self.net(states.permute(0, 3, 1, 2)), self.log_std_parameter
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = CNN(observation_space=env.observation_space, 
+             action_space=env.action_space, 
+             device=env.device, 
+             clip_actions=True,
+             clip_log_std=True,
+             min_log_std=-20,
+             max_log_std=2)
+# [end-cnn]
+
+import torch
+policy.to(env.device)
+actions = policy.act(torch.randn(10, 256, 256, 1, device=env.device), torch.randn(10, 2, device=env.device))
+assert actions[0].shape == torch.Size([10, env.action_space.shape[0]])

From eb46aaeb04c5efb048e52793bf3d20bc752b0cee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 26 Jul 2022 23:16:50 +0200
Subject: [PATCH 013/108] Define discriminator batch size in agent config

---
 skrl/agents/torch/amp/amp.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py
index e2befc77..1b1df03d 100644
--- a/skrl/agents/torch/amp/amp.py
+++ b/skrl/agents/torch/amp/amp.py
@@ -24,7 +24,6 @@
     "lambda": 0.95,                 # TD(lambda) coefficient (lam) for computing returns and advantages
     
     "learning_rate": 5e-5,                  # learning rate
-    "discriminator_learning_rate": 5e-5,    # discriminator learning rate
     "learning_rate_scheduler": None,        # learning rate scheduler class (see torch.optim.lr_scheduler)
     "learning_rate_scheduler_kwargs": {},   # learning rate scheduler's kwargs (e.g. {"step_size": 1e-3})
 
@@ -50,6 +49,7 @@
     "amp_batch_size": 512,                  # batch size for updating the reference motion dataset
     "task_reward_weight": 0.0,              # task-reward weight (wG)
     "style_reward_weight": 1.0,             # style-reward weight (wS)
+    "discriminator_batch_size": 0,          # batch size for computing the discriminator loss (all samples if 0)
     "discriminator_reward_scale": 2,                    # discriminator reward scaling factor
     "discriminator_logit_regularization_scale": 0.05,   # logit regularization scale factor for the discriminator loss
     "discriminator_gradient_penalty_scale": 5,          # gradient penalty scaling factor for the discriminator loss
@@ -154,7 +154,6 @@ def __init__(self,
         self._discriminator_loss_scale = self.cfg["discriminator_loss_scale"]
 
         self._learning_rate = self.cfg["learning_rate"]
-        self._discriminator_learning_rate = self.cfg["discriminator_learning_rate"]
         self._learning_rate_scheduler = self.cfg["learning_rate_scheduler"]
 
         self._state_preprocessor = self.cfg["state_preprocessor"]
@@ -171,6 +170,7 @@ def __init__(self,
         self._task_reward_weight = self.cfg["task_reward_weight"] 
         self._style_reward_weight = self.cfg["style_reward_weight"]
 
+        self._discriminator_batch_size = self.cfg["discriminator_batch_size"]
         self._discriminator_reward_scale = self.cfg["discriminator_reward_scale"]
         self._discriminator_logit_regularization_scale = self.cfg["discriminator_logit_regularization_scale"]
         self._discriminator_gradient_penalty_scale = self.cfg["discriminator_gradient_penalty_scale"]
@@ -208,7 +208,7 @@ def init(self) -> None:
             self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.float32)
             self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32)
             self.memory.create_tensor(name="dones", size=1, dtype=torch.bool)
-            self.memory.create_tensor(name="log_prob", size=self.action_space, dtype=torch.float32)
+            self.memory.create_tensor(name="log_prob", size=1, dtype=torch.float32)
             self.memory.create_tensor(name="values", size=1, dtype=torch.float32)
             self.memory.create_tensor(name="returns", size=1, dtype=torch.float32)
             self.memory.create_tensor(name="advantages", size=1, dtype=torch.float32)
@@ -477,12 +477,20 @@ def compute_gae(rewards: torch.Tensor,
                 value_loss = self._value_loss_scale * F.mse_loss(sampled_returns, predicted_values)
 
                 # compute discriminator loss
-                amp_logits, _, _ = self.discriminator.act(states=self._amp_state_preprocessor(sampled_amp_states[0:4096], train=True))
-
-                amp_replay_logits, _, _ = self.discriminator.act(states=self._amp_state_preprocessor(sampled_replay_batches[batch_index][0][0:4096], train=True))
+                if self._discriminator_batch_size:
+                    sampled_amp_states = self._amp_state_preprocessor(sampled_amp_states[0:self._discriminator_batch_size], train=True)
+                    sampled_amp_replay_states = self._amp_state_preprocessor(
+                        sampled_replay_batches[batch_index][0][0:self._discriminator_batch_size], train=True)
+                    sampled_amp_motion_states = self._amp_state_preprocessor(
+                        sampled_motion_batches[batch_index][0][0:self._discriminator_batch_size], train=True)
+                else:
+                    sampled_amp_states = self._amp_state_preprocessor(sampled_amp_states, train=True)
+                    sampled_amp_replay_states = self._amp_state_preprocessor(sampled_replay_batches[batch_index][0], train=True)
+                    sampled_amp_motion_states = self._amp_state_preprocessor(sampled_motion_batches[batch_index][0], train=True)
 
-                sampled_amp_motion_states = self._amp_state_preprocessor(sampled_motion_batches[batch_index][0][0:4096], train=True)
                 sampled_amp_motion_states.requires_grad_(True)
+                amp_logits, _, _ = self.discriminator.act(states=sampled_amp_states)
+                amp_replay_logits, _, _ = self.discriminator.act(states=sampled_amp_replay_states)
                 amp_motion_logits, _, _ = self.discriminator.act(states=sampled_amp_motion_states)
 
                 amp_cat_logits = torch.cat([amp_logits, amp_replay_logits], dim=0)
@@ -538,7 +546,7 @@ def compute_gae(rewards: torch.Tensor,
                 self.scheduler.step()
 
         # update AMP repaly buffer
-        self.reply_buffer.add_samples(states=amp_states)
+        self.reply_buffer.add_samples(states=amp_states.view(-1, amp_states.shape[-1]))
 
         # record data
         self.track_data("Loss / Policy loss", cumulative_policy_loss / (self._learning_epochs * self._mini_batches))

From f34dbfe619ba83d8e6151b0fbbdd0613d5ac3141 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 26 Jul 2022 23:30:59 +0200
Subject: [PATCH 014/108] Update AMP agent in docs

---
 docs/source/examples/isaacgym/amp_humanoid.py |  5 +++--
 docs/source/intro/examples.rst                |  2 +-
 docs/source/modules/skrl.agents.amp.rst       | 10 +++++-----
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/docs/source/examples/isaacgym/amp_humanoid.py b/docs/source/examples/isaacgym/amp_humanoid.py
index 6fe006d9..8b3c4f05 100644
--- a/docs/source/examples/isaacgym/amp_humanoid.py
+++ b/docs/source/examples/isaacgym/amp_humanoid.py
@@ -24,9 +24,9 @@
 # - Discriminator: differentiate between police-generated behaviors and behaviors from the motion dataset
 class Policy(GaussianModel):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+                         clip_log_std, min_log_std, max_log_std, reduction)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 1024),
                                  nn.ReLU(),
@@ -108,6 +108,7 @@ def compute(self, states, taken_actions):
 cfg_amp["amp_batch_size"] = 512
 cfg_amp["task_reward_weight"] = 0.0
 cfg_amp["style_reward_weight"] = 1.0
+cfg_amp["discriminator_batch_size"] = 4096
 cfg_amp["discriminator_reward_scale"] = 2
 cfg_amp["discriminator_logit_regularization_scale"] = 0.05
 cfg_amp["discriminator_gradient_penalty_scale"] = 5
diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index cf8f8f2e..02adee87 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -392,7 +392,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
             .. tab:: Humanoid (AMP)
                 
-                View the raw code: `ppo_humanoid.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/amp_humanoid.py>`_
+                View the raw code: `amp_humanoid.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/amp_humanoid.py>`_
 
                 .. literalinclude:: ../examples/isaacgym/amp_humanoid.py
                     :language: python
diff --git a/docs/source/modules/skrl.agents.amp.rst b/docs/source/modules/skrl.agents.amp.rst
index 2256f4d9..314ecf6a 100644
--- a/docs/source/modules/skrl.agents.amp.rst
+++ b/docs/source/modules/skrl.agents.amp.rst
@@ -33,7 +33,7 @@ Algorithm implementation
 |     :math:`A \leftarrow \dfrac{A - \bar{A}}{A_\sigma + 10^{-8}}`
 
 | :green:`# update dataset of reference motions`
-| :math:`\text{collect reference motions of}` :guilabel:`amp_batch_size` :math:`\rightarrow\;` :math:`\text{append}(M)`
+| collect reference motions of size :guilabel:`amp_batch_size` :math:`\rightarrow\;` :math:`\text{append}(M)`
 | :green:`# compute combined rewards`
 | :math:`r_D \leftarrow -log(\text{max}( 1 - \hat{y}(D_\psi(s_{_{AMP}})), \, 10^{-4})) \qquad` with :math:`\; \hat{y}(x) = \dfrac{1}{1 + e^{-x}}`
 | :math:`r' \leftarrow` :guilabel:`task_reward_weight` :math:`r \, +` :guilabel:`style_reward_weight` :guilabel:`discriminator_reward_scale` :math:`r_D`
@@ -67,9 +67,9 @@ Algorithm implementation
 |             :math:`V_{_{predicted}} \leftarrow V + \text{clip}(V_{_{predicted}} - V, -c, c) \qquad` with :math:`c` as :guilabel:`value_clip`
 |         :math:`L_{V_\phi} \leftarrow` :guilabel:`value_loss_scale` :math:`\frac{1}{N} \sum_{i=1}^N (R - V_{_{predicted}})^2`
 |         :green:`# compute discriminator loss`
-|         :math:`{logit}_{_{AMP}} \leftarrow D_\psi(s_{_{AMP}})`
-|         :math:`{logit}_{_{AMP}}^{^B} \leftarrow D_\psi(s_{_{AMP}}^{^B})`
-|         :math:`{logit}_{_{AMP}}^{^M} \leftarrow D_\psi(s_{_{AMP}}^{^M})`
+|         :math:`{logit}_{_{AMP}} \leftarrow D_\psi(s_{_{AMP}}) \qquad` with :math:`s_{_{AMP}}` of size :guilabel:`discriminator_batch_size`
+|         :math:`{logit}_{_{AMP}}^{^B} \leftarrow D_\psi(s_{_{AMP}}^{^B}) \qquad` with :math:`s_{_{AMP}}^{^B}` of size :guilabel:`discriminator_batch_size`
+|         :math:`{logit}_{_{AMP}}^{^M} \leftarrow D_\psi(s_{_{AMP}}^{^M}) \qquad` with :math:`s_{_{AMP}}^{^M}` of size :guilabel:`discriminator_batch_size`
 |         :green:`# discriminator prediction loss`
 |         :math:`L_{D_\psi} \leftarrow \dfrac{1}{2}(BCE({logit}_{_{AMP}}` ++ :math:`{logit}_{_{AMP}}^{^B}, \, 0) + BCE({logit}_{_{AMP}}^{^M}, \, 1))` 
 |              with :math:`\; BCE(x,y)=-\frac{1}{N} \sum_{i=1}^N [y \; log(\hat{y}) + (1-y) \, log(1-\hat{y})] \;` and :math:`\; \hat{y} = \dfrac{1}{1 + e^{-x}}`
@@ -97,7 +97,7 @@ Configuration and hyperparameters
 
 .. literalinclude:: ../../../skrl/agents/torch/amp/amp.py
    :language: python
-   :lines: 18-67
+   :lines: 18-68
    :linenos:
 
 Spaces and models

From 2eb42cd3745776d6dbe4b629bee96b66e1f55768 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 26 Jul 2022 23:42:56 +0200
Subject: [PATCH 015/108] Add gaussian models modification to CHANGELOG

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 196a69bc..cd35cd6e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ## [0.8.0] - Unreleased
 ### Added
 - AMP agent for physics-based character animation
+- Gaussian model
+
+### Changed
+- Multivariate Gaussian model (`GaussianModel` until 0.7.0) to `MultivariateGaussianModel`
 
 ## [0.7.0] - 2022-07-11
 ### Added

From 1c7e986a50e19095b779f948a59a82ab4750aaf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 26 Jul 2022 23:49:52 +0200
Subject: [PATCH 016/108] Update MINOR version

---
 skrl/version.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skrl/version.txt b/skrl/version.txt
index bcaffe19..8adc70fd 100644
--- a/skrl/version.txt
+++ b/skrl/version.txt
@@ -1 +1 @@
-0.7.0
\ No newline at end of file
+0.8.0
\ No newline at end of file

From d1809889e19df9dcf7c09e80af7eaf2f43369236 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Wed, 27 Jul 2022 15:13:16 +0200
Subject: [PATCH 017/108] Map model device location during loading

---
 skrl/models/torch/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py
index 7cd77c5d..ad810ce0 100644
--- a/skrl/models/torch/base.py
+++ b/skrl/models/torch/base.py
@@ -302,7 +302,7 @@ def load(self, path: str) -> None:
         :param path: Path to load the model from
         :type path: str
         """
-        self.load_state_dict(torch.load(path))
+        self.load_state_dict(torch.load(path, map_location=self.device))
         self.eval()
     
     def freeze_parameters(self, freeze: bool = True) -> None:

From 9e9281c553ede45a554c1faefa5918ef16bf5982 Mon Sep 17 00:00:00 2001
From: Toni-SM <toni.semu@gmail.com>
Date: Sat, 9 Jul 2022 00:07:08 +0200
Subject: [PATCH 018/108] Create python-publish.yml

---
 .github/workflows/python-publish.yml | 47 ++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 .github/workflows/python-publish.yml

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 00000000..6609624f
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,47 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package to PyPI
+      if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
+        verbose: true
+    - name: Publish package to TestPyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        user: __token__
+        password: ${{ secrets.TEST_PYPI_API_TOKEN }}
+        repository_url: https://test.pypi.org/legacy/

From 9407a0d84fb7dd12100fab629779476e664549c8 Mon Sep 17 00:00:00 2001
From: Toni-SM <toni.semu@gmail.com>
Date: Sat, 9 Jul 2022 00:17:07 +0200
Subject: [PATCH 019/108] Create manual.yml

---
 .github/workflows/manual.yml | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 .github/workflows/manual.yml

diff --git a/.github/workflows/manual.yml b/.github/workflows/manual.yml
new file mode 100644
index 00000000..9a98e4e9
--- /dev/null
+++ b/.github/workflows/manual.yml
@@ -0,0 +1,24 @@
+name: Manually triggered workflow
+on:
+  workflow_dispatch:
+    inputs:
+      job:
+        description: 'Job to run.'
+        required: true
+        default: 'prod-deploy'
+jobs:
+  dev-deploy:
+    name: Deploy dev-deploy
+    runs-on: ubuntu-latest
+    if: ${{ github.event.inputs.job == 'dev-deploy'}}
+    steps:
+    - name: ${{ github.event.inputs.job }} - step 1
+      run: echo "${{ github.event.inputs.job }} triggered!"
+
+  prod-deploy:
+    name: Deploy prod-deploy
+    runs-on: ubuntu-latest
+    if: ${{ github.event.inputs.job == 'prod-deploy'}}
+    steps:
+    - name: ${{ github.event.inputs.job }} - step 1
+      run: echo "${{ github.event.inputs.job }} triggered!"

From a9f0f02e20fe83a934fc7680472002df9f645aff Mon Sep 17 00:00:00 2001
From: Toni-SM <toni.semu@gmail.com>
Date: Sat, 9 Jul 2022 00:27:35 +0200
Subject: [PATCH 020/108] Update and rename manual.yml to
 python-publish-manual.yml

---
 .github/workflows/manual.yml                | 24 -------------
 .github/workflows/python-publish-manual.yml | 38 +++++++++++++++++++++
 2 files changed, 38 insertions(+), 24 deletions(-)
 delete mode 100644 .github/workflows/manual.yml
 create mode 100644 .github/workflows/python-publish-manual.yml

diff --git a/.github/workflows/manual.yml b/.github/workflows/manual.yml
deleted file mode 100644
index 9a98e4e9..00000000
--- a/.github/workflows/manual.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-name: Manually triggered workflow
-on:
-  workflow_dispatch:
-    inputs:
-      job:
-        description: 'Job to run.'
-        required: true
-        default: 'prod-deploy'
-jobs:
-  dev-deploy:
-    name: Deploy dev-deploy
-    runs-on: ubuntu-latest
-    if: ${{ github.event.inputs.job == 'dev-deploy'}}
-    steps:
-    - name: ${{ github.event.inputs.job }} - step 1
-      run: echo "${{ github.event.inputs.job }} triggered!"
-
-  prod-deploy:
-    name: Deploy prod-deploy
-    runs-on: ubuntu-latest
-    if: ${{ github.event.inputs.job == 'prod-deploy'}}
-    steps:
-    - name: ${{ github.event.inputs.job }} - step 1
-      run: echo "${{ github.event.inputs.job }} triggered!"
diff --git a/.github/workflows/python-publish-manual.yml b/.github/workflows/python-publish-manual.yml
new file mode 100644
index 00000000..58ae055d
--- /dev/null
+++ b/.github/workflows/python-publish-manual.yml
@@ -0,0 +1,38 @@
+name: Upload Python Package (manually triggered workflow)
+
+on:
+  workflow_dispatch:
+    inputs:
+      job:
+        description: 'Upload Python Package to PyPI/TestPyPI'
+        required: true
+        default: 'test-pypi'
+
+permissions:
+  contents: read
+
+jobs:
+  pypi:
+    name: Publish package to PyPI
+    runs-on: ubuntu-latest
+    if: ${{ github.event.inputs.job == 'dev-deploy'}}
+    steps:
+    - name: Publish package to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
+        verbose: true
+
+  test-pypi:
+    name: Publish package to TestPyPI
+    runs-on: ubuntu-latest
+    if: ${{ github.event.inputs.job == 'prod-deploy'}}
+    steps:
+    - name: Publish package to TestPyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        user: __token__
+        password: ${{ secrets.TEST_PYPI_API_TOKEN }}
+        repository_url: https://test.pypi.org/legacy/
+        verbose: true

From f0bc89300abf520ac34a44ae8f1bf354e5a25d0c Mon Sep 17 00:00:00 2001
From: Toni-SM <toni.semu@gmail.com>
Date: Sat, 9 Jul 2022 00:32:24 +0200
Subject: [PATCH 021/108] Update python-publish-manual.yml

---
 .github/workflows/python-publish-manual.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-publish-manual.yml b/.github/workflows/python-publish-manual.yml
index 58ae055d..caec0aa6 100644
--- a/.github/workflows/python-publish-manual.yml
+++ b/.github/workflows/python-publish-manual.yml
@@ -15,7 +15,7 @@ jobs:
   pypi:
     name: Publish package to PyPI
     runs-on: ubuntu-latest
-    if: ${{ github.event.inputs.job == 'dev-deploy'}}
+    if: ${{ github.event.inputs.job == 'pypi'}}
     steps:
     - name: Publish package to PyPI
       uses: pypa/gh-action-pypi-publish@release/v1
@@ -27,7 +27,7 @@ jobs:
   test-pypi:
     name: Publish package to TestPyPI
     runs-on: ubuntu-latest
-    if: ${{ github.event.inputs.job == 'prod-deploy'}}
+    if: ${{ github.event.inputs.job == 'test-pypi'}}
     steps:
     - name: Publish package to TestPyPI
       uses: pypa/gh-action-pypi-publish@release/v1

From c0fd1d22432b2970002d1af385dde8907b3b6841 Mon Sep 17 00:00:00 2001
From: Toni-SM <toni.semu@gmail.com>
Date: Sat, 9 Jul 2022 00:37:13 +0200
Subject: [PATCH 022/108] Update python-publish-manual.yml

---
 .github/workflows/python-publish-manual.yml | 22 +++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/.github/workflows/python-publish-manual.yml b/.github/workflows/python-publish-manual.yml
index caec0aa6..c3caae87 100644
--- a/.github/workflows/python-publish-manual.yml
+++ b/.github/workflows/python-publish-manual.yml
@@ -17,6 +17,17 @@ jobs:
     runs-on: ubuntu-latest
     if: ${{ github.event.inputs.job == 'pypi'}}
     steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.7'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
     - name: Publish package to PyPI
       uses: pypa/gh-action-pypi-publish@release/v1
       with:
@@ -29,6 +40,17 @@ jobs:
     runs-on: ubuntu-latest
     if: ${{ github.event.inputs.job == 'test-pypi'}}
     steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.7'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
     - name: Publish package to TestPyPI
       uses: pypa/gh-action-pypi-publish@release/v1
       with:

From 195708ff0994a8ea5c7c2494af2ab3f05551a3fa Mon Sep 17 00:00:00 2001
From: Toni-SM <toni.semu@gmail.com>
Date: Mon, 11 Jul 2022 23:58:48 +0200
Subject: [PATCH 023/108] Delete python-publish.yml

---
 .github/workflows/python-publish.yml | 47 ----------------------------
 1 file changed, 47 deletions(-)
 delete mode 100644 .github/workflows/python-publish.yml

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
deleted file mode 100644
index 6609624f..00000000
--- a/.github/workflows/python-publish.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-# This workflow will upload a Python Package using Twine when a release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
-
-# This workflow uses actions that are not certified by GitHub.
-# They are provided by a third-party and are governed by
-# separate terms of service, privacy policy, and support
-# documentation.
-
-name: Upload Python Package
-
-on:
-  release:
-    types: [published]
-
-permissions:
-  contents: read
-
-jobs:
-  deploy:
-
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v3
-      with:
-        python-version: '3.x'
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install build
-    - name: Build package
-      run: python -m build
-    - name: Publish package to PyPI
-      if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
-      uses: pypa/gh-action-pypi-publish@release/v1
-      with:
-        user: __token__
-        password: ${{ secrets.PYPI_API_TOKEN }}
-        verbose: true
-    - name: Publish package to TestPyPI
-      uses: pypa/gh-action-pypi-publish@release/v1
-      with:
-        user: __token__
-        password: ${{ secrets.TEST_PYPI_API_TOKEN }}
-        repository_url: https://test.pypi.org/legacy/

From a5cd534218435c5acf4e94c854f343d45f303fa3 Mon Sep 17 00:00:00 2001
From: Johann Christensen <johannlange@outlook.de>
Date: Sun, 24 Jul 2022 13:22:56 +0200
Subject: [PATCH 024/108] Replace custom progress with cleaner tqdm

---
 .../examples/isaacgym/ppo_ball_balance.py     | 12 +--
 docs/source/examples/isaacgym/ppo_cartpole.py | 12 +--
 .../examples/isaacgym/ppo_quadcopter.py       | 12 +--
 .../source/examples/isaacgym/trpo_cartpole.py | 12 +--
 .../examples/isaacsim/isaacsim_jetbot_ppo.py  | 22 +++---
 .../examples/omniisaacgym/ppo_cartpole.py     | 10 +--
 .../examples/omniisaacgym/ppo_cartpole_mt.py  | 10 +--
 setup.py                                      |  1 +
 skrl/trainers/torch/base.py                   | 76 ++++++-------------
 skrl/trainers/torch/parallel.py               | 55 ++++++--------
 skrl/trainers/torch/sequential.py             | 63 ++++++++-------
 11 files changed, 123 insertions(+), 162 deletions(-)

diff --git a/docs/source/examples/isaacgym/ppo_ball_balance.py b/docs/source/examples/isaacgym/ppo_ball_balance.py
index 1baf3895..84e9999a 100644
--- a/docs/source/examples/isaacgym/ppo_ball_balance.py
+++ b/docs/source/examples/isaacgym/ppo_ball_balance.py
@@ -57,7 +57,7 @@ def compute(self, states, taken_actions):
 
 
 # Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
+# The following lines are intended to support all versions (preview 2, 3 and 4).
 # It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
 try:
     env = load_isaacgym_env_preview4(task_name="BallBalance")   # preview 3 and 4 use the same loader
@@ -81,7 +81,7 @@ def compute(self, states, taken_actions):
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
 
 
 # Configure and instantiate the agent.
@@ -115,15 +115,15 @@ def compute(self, states, taken_actions):
 cfg_ppo["experiment"]["checkpoint_interval"] = 200
 
 agent = PPO(models=models_ppo,
-            memory=memory, 
-            cfg=cfg_ppo, 
-            observation_space=env.observation_space, 
+            memory=memory,
+            cfg=cfg_ppo,
+            observation_space=env.observation_space,
             action_space=env.action_space,
             device=device)
 
 
 # Configure and instantiate the RL trainer
-cfg_trainer = {"timesteps": 4000, "headless": True, "progress_interval": 400}
+cfg_trainer = {"timesteps": 4000, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # start training
diff --git a/docs/source/examples/isaacgym/ppo_cartpole.py b/docs/source/examples/isaacgym/ppo_cartpole.py
index 8933f319..260a298e 100644
--- a/docs/source/examples/isaacgym/ppo_cartpole.py
+++ b/docs/source/examples/isaacgym/ppo_cartpole.py
@@ -53,7 +53,7 @@ def compute(self, states, taken_actions):
 
 
 # Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
+# The following lines are intended to support all versions (preview 2, 3 and 4).
 # It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
 try:
     env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
@@ -77,7 +77,7 @@ def compute(self, states, taken_actions):
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
 
 
 # Configure and instantiate the agent.
@@ -111,15 +111,15 @@ def compute(self, states, taken_actions):
 cfg_ppo["experiment"]["checkpoint_interval"] = 80
 
 agent = PPO(models=models_ppo,
-            memory=memory, 
-            cfg=cfg_ppo, 
-            observation_space=env.observation_space, 
+            memory=memory,
+            cfg=cfg_ppo,
+            observation_space=env.observation_space,
             action_space=env.action_space,
             device=device)
 
 
 # Configure and instantiate the RL trainer
-cfg_trainer = {"timesteps": 1600, "headless": True, "progress_interval": 160}
+cfg_trainer = {"timesteps": 1600, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # start training
diff --git a/docs/source/examples/isaacgym/ppo_quadcopter.py b/docs/source/examples/isaacgym/ppo_quadcopter.py
index b7ace921..883e31f4 100644
--- a/docs/source/examples/isaacgym/ppo_quadcopter.py
+++ b/docs/source/examples/isaacgym/ppo_quadcopter.py
@@ -57,7 +57,7 @@ def compute(self, states, taken_actions):
 
 
 # Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
+# The following lines are intended to support all versions (preview 2, 3 and 4).
 # It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
 try:
     env = load_isaacgym_env_preview4(task_name="Quadcopter")   # preview 3 and 4 use the same loader
@@ -81,7 +81,7 @@ def compute(self, states, taken_actions):
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
 
 
 # Configure and instantiate the agent.
@@ -115,15 +115,15 @@ def compute(self, states, taken_actions):
 cfg_ppo["experiment"]["checkpoint_interval"] = 200
 
 agent = PPO(models=models_ppo,
-            memory=memory, 
-            cfg=cfg_ppo, 
-            observation_space=env.observation_space, 
+            memory=memory,
+            cfg=cfg_ppo,
+            observation_space=env.observation_space,
             action_space=env.action_space,
             device=device)
 
 
 # Configure and instantiate the RL trainer
-cfg_trainer = {"timesteps": 4000, "headless": True, "progress_interval": 400}
+cfg_trainer = {"timesteps": 4000, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # start training
diff --git a/docs/source/examples/isaacgym/trpo_cartpole.py b/docs/source/examples/isaacgym/trpo_cartpole.py
index 8a51f840..31266a55 100644
--- a/docs/source/examples/isaacgym/trpo_cartpole.py
+++ b/docs/source/examples/isaacgym/trpo_cartpole.py
@@ -52,7 +52,7 @@ def compute(self, states, taken_actions):
 
 
 # Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
+# The following lines are intended to support all versions (preview 2, 3 and 4).
 # It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
 try:
     env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
@@ -76,7 +76,7 @@ def compute(self, states, taken_actions):
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_trpo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
 
 
 # Configure and instantiate the agent.
@@ -98,15 +98,15 @@ def compute(self, states, taken_actions):
 cfg_trpo["experiment"]["checkpoint_interval"] = 125
 
 agent = TRPO(models=models_trpo,
-            memory=memory, 
-            cfg=cfg_trpo, 
-            observation_space=env.observation_space, 
+            memory=memory,
+            cfg=cfg_trpo,
+            observation_space=env.observation_space,
             action_space=env.action_space,
             device=device)
 
 
 # Configure and instantiate the RL trainer
-cfg_trainer = {"timesteps": 2500, "headless": True, "progress_interval": 250}
+cfg_trainer = {"timesteps": 2500, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # start training
diff --git a/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py b/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py
index 672fa3c6..539d69b9 100644
--- a/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py
+++ b/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py
@@ -12,7 +12,7 @@
 from skrl.envs.torch import wrap_env
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes 
+# Define the models (stochastic and deterministic models) for the agent using helper classes
 # and programming with two approaches (layer by layer and torch.nn.Sequential class).
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
@@ -41,8 +41,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
     def compute(self, states, taken_actions):
-        # view (samples, width * height * channels) -> (samples, width, height, channels) 
-        # permute (samples, width, height, channels) -> (samples, channels, width, height) 
+        # view (samples, width * height * channels) -> (samples, width, height, channels)
+        # permute (samples, width, height, channels) -> (samples, channels, width, height)
         x = self.net(states.view(-1, *self.observation_space.shape).permute(0, 3, 1, 2))
         return 10 * torch.tanh(x), self.log_std_parameter   # JetBotEnv action_space is -10 to 10
 
@@ -66,10 +66,10 @@ def __init__(self, observation_space, action_space, device, clip_actions = False
                                  nn.Linear(64, 32),
                                  nn.Tanh(),
                                  nn.Linear(32, 1))
-    
+
     def compute(self, states, taken_actions):
-        # view (samples, width * height * channels) -> (samples, width, height, channels) 
-        # permute (samples, width, height, channels) -> (samples, channels, width, height) 
+        # view (samples, width * height * channels) -> (samples, width, height, channels)
+        # permute (samples, width, height, channels) -> (samples, channels, width, height)
         return self.net(states.view(-1, *self.observation_space.shape).permute(0, 3, 1, 2))
 
 
@@ -92,7 +92,7 @@ def compute(self, states, taken_actions):
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
 
 
 # Configure and instantiate the agent.
@@ -120,15 +120,15 @@ def compute(self, states, taken_actions):
 cfg_ppo["experiment"]["checkpoint_interval"] = 10000
 
 agent = PPO(models=models_ppo,
-            memory=memory, 
-            cfg=cfg_ppo, 
-            observation_space=env.observation_space, 
+            memory=memory,
+            cfg=cfg_ppo,
+            observation_space=env.observation_space,
             action_space=env.action_space,
             device=device)
 
 
 # Configure and instanciate the RL trainer
-cfg_trainer = {"timesteps": 500000, "headless": True, "progress_interval": 10000}
+cfg_trainer = {"timesteps": 500000, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # start training
diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole.py b/docs/source/examples/omniisaacgym/ppo_cartpole.py
index 034b31c0..789dfbff 100644
--- a/docs/source/examples/omniisaacgym/ppo_cartpole.py
+++ b/docs/source/examples/omniisaacgym/ppo_cartpole.py
@@ -68,7 +68,7 @@ def compute(self, states, taken_actions):
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
 
 
 # Configure and instantiate the agent.
@@ -98,15 +98,15 @@ def compute(self, states, taken_actions):
 cfg_ppo["experiment"]["checkpoint_interval"] = 80
 
 agent = PPO(models=models_ppo,
-            memory=memory, 
-            cfg=cfg_ppo, 
-            observation_space=env.observation_space, 
+            memory=memory,
+            cfg=cfg_ppo,
+            observation_space=env.observation_space,
             action_space=env.action_space,
             device=device)
 
 
 # Configure and instantiate the RL trainer
-cfg_trainer = {"timesteps": 1600, "headless": True, "progress_interval": 160}
+cfg_trainer = {"timesteps": 1600, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # start training
diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
index 888caaeb..065b7826 100644
--- a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
+++ b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
@@ -70,7 +70,7 @@ def compute(self, states, taken_actions):
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
 
 
 # Configure and instantiate the agent.
@@ -100,15 +100,15 @@ def compute(self, states, taken_actions):
 cfg_ppo["experiment"]["checkpoint_interval"] = 80
 
 agent = PPO(models=models_ppo,
-            memory=memory, 
-            cfg=cfg_ppo, 
-            observation_space=env.observation_space, 
+            memory=memory,
+            cfg=cfg_ppo,
+            observation_space=env.observation_space,
             action_space=env.action_space,
             device=device)
 
 
 # Configure and instantiate the RL trainer
-cfg_trainer = {"timesteps": 1600, "headless": True, "progress_interval": 160}
+cfg_trainer = {"timesteps": 1600, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # start training in a separate thread
diff --git a/setup.py b/setup.py
index c48d99ac..d9bf4aa1 100644
--- a/setup.py
+++ b/setup.py
@@ -12,6 +12,7 @@
     "gym",
     "torch",
     "tensorboard",
+    "tqdm",
 ]
 
 # installation
diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py
index 1b7560b4..9635c42d 100644
--- a/skrl/trainers/torch/base.py
+++ b/skrl/trainers/torch/base.py
@@ -3,6 +3,7 @@
 import time
 
 import torch
+from tqdm import tqdm
 
 from ...envs.torch import Wrapper
 from ...agents.torch import Agent
@@ -31,10 +32,10 @@ def generate_equally_spaced_scopes(num_envs: int, num_agents: int) -> List[int]:
 
 
 class Trainer():
-    def __init__(self, 
-                 cfg: dict, 
-                 env: Wrapper, 
-                 agents: Union[Agent, List[Agent], List[List[Agent]]], 
+    def __init__(self,
+                 cfg: dict,
+                 env: Wrapper,
+                 agents: Union[Agent, List[Agent], List[List[Agent]]],
                  agents_scope : List[int] = []) -> None:
         """Base class for trainers
 
@@ -51,11 +52,10 @@ def __init__(self,
         self.env = env
         self.agents = agents
         self.agents_scope = agents_scope
-        
+
         # get configuration
         self.timesteps = self.cfg.get('timesteps', 0)
         self.headless = self.cfg.get("headless", False)
-        self.progress_interval = self.cfg.get("progress_interval", 1000)
 
         self.initial_timestep = 0
 
@@ -116,7 +116,7 @@ def _setup_agents(self) -> None:
                     raise ValueError("The scopes ({}) don't cover the number of parallelizable environments ({})" \
                         .format(sum(self.agents_scope), self.env.num_envs))
                 # generate agents' scopes
-                index = 0 
+                index = 0
                 for i in range(len(self.agents_scope)):
                     index += self.agents_scope[i]
                     self.agents_scope[i] = (index - self.agents_scope[i], index)
@@ -124,34 +124,6 @@ def _setup_agents(self) -> None:
                 raise ValueError("A list of agents is expected")
         else:
             self.num_agents = 1
-        
-    def show_progress(self, timestep: int, timesteps: int) -> None:
-        """Show training progress
-
-        :param timestep: Current timestep
-        :type timestep: int
-        :param timesteps: Number of timesteps
-        :type timesteps: int
-        """
-        if timestep > 0:
-            timestep += 1
-        
-        if not timestep % self.progress_interval:
-            current_timestamp = time.time()
-            if self._timestamp is None:
-                self._timestamp = current_timestamp
-                self._timestamp_elapsed = self._timestamp
-
-            delta = current_timestamp - self._timestamp
-            elapsed = current_timestamp - self._timestamp_elapsed if timestep else 0.0
-            remaining = elapsed * (self.timesteps / timestep - 1) if timestep else 0.0
-            
-            self._timestamp = current_timestamp
-
-            print("|--------------------------|--------------------------|")
-            print("|     timestep / timesteps | {} / {}".format(timestep, self.timesteps))
-            print("|     timesteps per second |", round(self.progress_interval / delta, 2) if timestep else 0.0)
-            print("| elapsed / remaining time | {} sec / {} sec".format(round(elapsed, 2), round(remaining, 2)))
 
     def train(self) -> None:
         """Train the agents
@@ -170,7 +142,7 @@ def eval(self) -> None:
     def start(self) -> None:
         """Start training
 
-        This method is deprecated in favour of the '.train()' method 
+        This method is deprecated in favour of the '.train()' method
         """
         # TODO: remove this method in future versions
         print("[WARNING] Trainer.start() method is deprecated in favour of the '.train()' method")
@@ -193,27 +165,25 @@ def single_agent_train(self) -> None:
         # reset env
         states = self.env.reset()
 
-        for timestep in range(self.initial_timestep, self.timesteps):
-            # show progress
-            self.show_progress(timestep=timestep, timesteps=self.timesteps)
+        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
 
             # pre-interaction
             self.agents.pre_interaction(timestep=timestep, timesteps=self.timesteps)
-            
+
             # compute actions
             with torch.no_grad():
                 actions, _, _ = self.agents.act(states, inference=True, timestep=timestep, timesteps=self.timesteps)
-            
+
             # step the environments
             next_states, rewards, dones, infos = self.env.step(actions)
-            
+
             # render scene
             if not self.headless:
                 self.env.render()
 
             # record the environments' transitions
             with torch.no_grad():
-                self.agents.record_transition(states=states, 
+                self.agents.record_transition(states=states,
                                               actions=actions,
                                               rewards=rewards,
                                               next_states=next_states,
@@ -221,10 +191,10 @@ def single_agent_train(self) -> None:
                                               infos=infos,
                                               timestep=timestep,
                                               timesteps=self.timesteps)
-            
+
             # post-interaction
             self.agents.post_interaction(timestep=timestep, timesteps=self.timesteps)
-            
+
             # reset environments
             with torch.no_grad():
                 if dones.any():
@@ -239,7 +209,7 @@ def single_agent_eval(self) -> None:
         """Evaluate the agents sequentially
 
         This method executes the following steps in loop:
-        
+
         - Compute actions (sequentially)
         - Interact with the environments
         - Render scene
@@ -250,24 +220,22 @@ def single_agent_eval(self) -> None:
         # reset env
         states = self.env.reset()
 
-        for timestep in range(self.initial_timestep, self.timesteps):
-            # show progress
-            self.show_progress(timestep=timestep, timesteps=self.timesteps)
-            
+        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
+
             # compute actions
             with torch.no_grad():
                 actions, _, _ = self.agents.act(states, inference=True, timestep=timestep, timesteps=self.timesteps)
-            
+
             # step the environments
             next_states, rewards, dones, infos = self.env.step(actions)
-            
+
             # render scene
             if not self.headless:
                 self.env.render()
-            
+
             with torch.no_grad():
                 # write data to TensorBoard
-                super(type(self.agents), self.agents).record_transition(states=states, 
+                super(type(self.agents), self.agents).record_transition(states=states,
                                                                         actions=actions,
                                                                         rewards=rewards,
                                                                         next_states=next_states,
diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py
index 60742017..17d1e72a 100644
--- a/skrl/trainers/torch/parallel.py
+++ b/skrl/trainers/torch/parallel.py
@@ -38,7 +38,7 @@ def fn_processor(process_index, *args):
             agent.init()
             print("[INFO] Processor {}: init agent {} with scope {}".format(process_index, type(agent).__name__, scope))
             barrier.wait()
-        
+
         # execute agent's pre-interaction step
         elif task == "pre_interaction":
             agent.pre_interaction(timestep=msg['timestep'], timesteps=msg['timesteps'])
@@ -48,9 +48,9 @@ def fn_processor(process_index, *args):
         elif task == "act":
             _states = queue.get()[scope[0]:scope[1]]
             with torch.no_grad():
-                _actions = agent.act(_states, 
+                _actions = agent.act(_states,
                                      inference=True,
-                                     timestep=msg['timestep'], 
+                                     timestep=msg['timestep'],
                                      timesteps=msg['timesteps'])[0]
                 if not _actions.is_cuda:
                     _actions.share_memory_()
@@ -60,7 +60,7 @@ def fn_processor(process_index, *args):
         # record agent's experience
         elif task == "record_transition":
             with torch.no_grad():
-                agent.record_transition(states=_states, 
+                agent.record_transition(states=_states,
                                         actions=_actions,
                                         rewards=queue.get()[scope[0]:scope[1]],
                                         next_states=queue.get()[scope[0]:scope[1]],
@@ -69,7 +69,7 @@ def fn_processor(process_index, *args):
                                         timestep=msg['timestep'],
                                         timesteps=msg['timesteps'])
                 barrier.wait()
-        
+
         # execute agent's post-interaction step
         elif task == "post_interaction":
             agent.post_interaction(timestep=msg['timestep'], timesteps=msg['timesteps'])
@@ -78,7 +78,7 @@ def fn_processor(process_index, *args):
         # write data to TensorBoard (evaluation)
         elif task == "eval-record_transition-post_interaction":
             with torch.no_grad():
-                super(type(agent), agent).record_transition(states=_states, 
+                super(type(agent), agent).record_transition(states=_states,
                                                             actions=_actions,
                                                             rewards=queue.get()[scope[0]:scope[1]],
                                                             next_states=queue.get()[scope[0]:scope[1]],
@@ -91,13 +91,13 @@ def fn_processor(process_index, *args):
 
 
 class ParallelTrainer(Trainer):
-    def __init__(self, 
-                 cfg: dict, 
-                 env: Wrapper, 
-                 agents: Union[Agent, List[Agent], List[List[Agent]]], 
+    def __init__(self,
+                 cfg: dict,
+                 env: Wrapper,
+                 agents: Union[Agent, List[Agent], List[List[Agent]]],
                  agents_scope : List[int] = []) -> None:
         """Parallel trainer
-        
+
         Train agents in parallel using multiple processes
 
         :param cfg: Configuration dictionary
@@ -138,7 +138,7 @@ def train(self) -> None:
         consumer_pipes = []
         barrier = mp.Barrier(self.num_agents + 1)
         processes = []
-        
+
         for i in range(self.num_agents):
             pipe_read, pipe_write = mp.Pipe(duplex=False)
             producer_pipes.append(pipe_write)
@@ -175,9 +175,7 @@ def train(self) -> None:
         if not states.is_cuda:
             states.share_memory_()
 
-        for timestep in range(self.initial_timestep, self.timesteps):
-            # show progress
-            self.show_progress(timestep=timestep, timesteps=self.timesteps)
+        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
 
             # pre-interaction
             for pipe in producer_pipes:
@@ -192,10 +190,10 @@ def train(self) -> None:
 
                 barrier.wait()
                 actions = torch.vstack([queue.get() for queue in queues])
-                
+
             # step the environments
             next_states, rewards, dones, infos = self.env.step(actions)
-            
+
             # render scene
             if not self.headless:
                 self.env.render()
@@ -208,7 +206,7 @@ def train(self) -> None:
                     next_states.share_memory_()
                 if not dones.is_cuda:
                     dones.share_memory_()
-                
+
                 for pipe, queue in zip(producer_pipes, queues):
                     pipe.send({"task": "record_transition", "timestep": timestep, "timesteps": self.timesteps})
                     queue.put(rewards)
@@ -234,7 +232,7 @@ def train(self) -> None:
         # terminate processes
         for pipe in producer_pipes:
             pipe.send({"task": "terminate"})
-        
+
         # join processes
         for process in processes:
             process.join()
@@ -264,7 +262,7 @@ def eval(self) -> None:
         consumer_pipes = []
         barrier = mp.Barrier(self.num_agents + 1)
         processes = []
-        
+
         for i in range(self.num_agents):
             pipe_read, pipe_write = mp.Pipe(duplex=False)
             producer_pipes.append(pipe_write)
@@ -302,9 +300,7 @@ def eval(self) -> None:
         if not states.is_cuda:
             states.share_memory_()
 
-        for timestep in range(self.initial_timestep, self.timesteps):
-            # show progress
-            self.show_progress(timestep=timestep, timesteps=self.timesteps)
+        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
 
             # compute actions
             with torch.no_grad():
@@ -314,10 +310,10 @@ def eval(self) -> None:
 
                 barrier.wait()
                 actions = torch.vstack([queue.get() for queue in queues])
-                
+
             # step the environments
             next_states, rewards, dones, infos = self.env.step(actions)
-            
+
             # render scene
             if not self.headless:
                 self.env.render()
@@ -330,10 +326,10 @@ def eval(self) -> None:
                     next_states.share_memory_()
                 if not dones.is_cuda:
                     dones.share_memory_()
-                
+
                 for pipe, queue in zip(producer_pipes, queues):
-                    pipe.send({"task": "eval-record_transition-post_interaction", 
-                               "timestep": timestep, 
+                    pipe.send({"task": "eval-record_transition-post_interaction",
+                               "timestep": timestep,
                                "timesteps": self.timesteps})
                     queue.put(rewards)
                     queue.put(next_states)
@@ -352,11 +348,10 @@ def eval(self) -> None:
         # terminate processes
         for pipe in producer_pipes:
             pipe.send({"task": "terminate"})
-        
+
         # join processes
         for process in processes:
             process.join()
 
         # close the environment
         self.env.close()
-        
\ No newline at end of file
diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py
index 22618d41..eed888e8 100644
--- a/skrl/trainers/torch/sequential.py
+++ b/skrl/trainers/torch/sequential.py
@@ -1,6 +1,7 @@
 from typing import Union, List
 
 import torch
+from tqdm import tqdm
 
 from ...envs.torch import Wrapper
 from ...agents.torch import Agent
@@ -9,13 +10,13 @@
 
 
 class SequentialTrainer(Trainer):
-    def __init__(self, 
-                 cfg: dict, 
-                 env: Wrapper, 
-                 agents: Union[Agent, List[Agent], List[List[Agent]]], 
+    def __init__(self,
+                 cfg: dict,
+                 env: Wrapper,
+                 agents: Union[Agent, List[Agent], List[List[Agent]]],
                  agents_scope : List[int] = []) -> None:
         """Sequential trainer
-        
+
         Train agents sequentially (i.e., one after the other in each interaction with the environment)
 
         :param cfg: Configuration dictionary
@@ -57,25 +58,23 @@ def train(self) -> None:
         # reset env
         states = self.env.reset()
 
-        for timestep in range(self.initial_timestep, self.timesteps):
-            # show progress
-            self.show_progress(timestep=timestep, timesteps=self.timesteps)
+        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
 
             # pre-interaction
             for agent in self.agents:
                 agent.pre_interaction(timestep=timestep, timesteps=self.timesteps)
-            
+
             # compute actions
             with torch.no_grad():
-                actions = torch.vstack([agent.act(states[scope[0]:scope[1]], 
+                actions = torch.vstack([agent.act(states[scope[0]:scope[1]],
                                                   inference=True,
-                                                  timestep=timestep, 
+                                                  timestep=timestep,
                                                   timesteps=self.timesteps)[0] \
                                         for agent, scope in zip(self.agents, self.agents_scope)])
-            
+
             # step the environments
             next_states, rewards, dones, infos = self.env.step(actions)
-            
+
             # render scene
             if not self.headless:
                 self.env.render()
@@ -83,15 +82,15 @@ def train(self) -> None:
             # record the environments' transitions
             with torch.no_grad():
                 for agent, scope in zip(self.agents, self.agents_scope):
-                    agent.record_transition(states=states[scope[0]:scope[1]], 
-                                            actions=actions[scope[0]:scope[1]], 
-                                            rewards=rewards[scope[0]:scope[1]], 
-                                            next_states=next_states[scope[0]:scope[1]], 
+                    agent.record_transition(states=states[scope[0]:scope[1]],
+                                            actions=actions[scope[0]:scope[1]],
+                                            rewards=rewards[scope[0]:scope[1]],
+                                            next_states=next_states[scope[0]:scope[1]],
                                             dones=dones[scope[0]:scope[1]],
                                             infos=infos,
                                             timestep=timestep,
                                             timesteps=self.timesteps)
-            
+
             # post-interaction
             for agent in self.agents:
                 agent.post_interaction(timestep=timestep, timesteps=self.timesteps)
@@ -110,7 +109,7 @@ def eval(self) -> None:
         """Evaluate the agents sequentially
 
         This method executes the following steps in loop:
-        
+
         - Compute actions (sequentially)
         - Interact with the environments
         - Render scene
@@ -124,32 +123,30 @@ def eval(self) -> None:
         # reset env
         states = self.env.reset()
 
-        for timestep in range(self.initial_timestep, self.timesteps):
-            # show progress
-            self.show_progress(timestep=timestep, timesteps=self.timesteps)
-            
+        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
+
             # compute actions
             with torch.no_grad():
-                actions = torch.vstack([agent.act(states[scope[0]:scope[1]], 
+                actions = torch.vstack([agent.act(states[scope[0]:scope[1]],
                                                   inference=True,
-                                                  timestep=timestep, 
+                                                  timestep=timestep,
                                                   timesteps=self.timesteps)[0] \
                                         for agent, scope in zip(self.agents, self.agents_scope)])
-            
+
             # step the environments
             next_states, rewards, dones, infos = self.env.step(actions)
-            
+
             # render scene
             if not self.headless:
                 self.env.render()
-            
+
             with torch.no_grad():
                 # write data to TensorBoard
                 for agent, scope in zip(self.agents, self.agents_scope):
-                    super(type(agent), agent).record_transition(states=states[scope[0]:scope[1]], 
-                                                                actions=actions[scope[0]:scope[1]], 
-                                                                rewards=rewards[scope[0]:scope[1]], 
-                                                                next_states=next_states[scope[0]:scope[1]], 
+                    super(type(agent), agent).record_transition(states=states[scope[0]:scope[1]],
+                                                                actions=actions[scope[0]:scope[1]],
+                                                                rewards=rewards[scope[0]:scope[1]],
+                                                                next_states=next_states[scope[0]:scope[1]],
                                                                 dones=dones[scope[0]:scope[1]],
                                                                 infos=infos,
                                                                 timestep=timestep,
@@ -168,7 +165,7 @@ def eval(self) -> None:
     def start(self) -> None:
         """Start training
 
-        This method is deprecated in favour of the '.train()' method 
+        This method is deprecated in favour of the '.train()' method
         """
         super().start()
         self.train()

From 1002e0ec440049cca4c0be39f5a155ac819783a7 Mon Sep 17 00:00:00 2001
From: Johann Christensen <johannlange@outlook.de>
Date: Thu, 28 Jul 2022 09:02:02 +0200
Subject: [PATCH 025/108] Remove remnant files from wrong git operations

---
 .github/workflows/python-publish-manual.yml | 60 ---------------------
 1 file changed, 60 deletions(-)
 delete mode 100644 .github/workflows/python-publish-manual.yml

diff --git a/.github/workflows/python-publish-manual.yml b/.github/workflows/python-publish-manual.yml
deleted file mode 100644
index c3caae87..00000000
--- a/.github/workflows/python-publish-manual.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-name: Upload Python Package (manually triggered workflow)
-
-on:
-  workflow_dispatch:
-    inputs:
-      job:
-        description: 'Upload Python Package to PyPI/TestPyPI'
-        required: true
-        default: 'test-pypi'
-
-permissions:
-  contents: read
-
-jobs:
-  pypi:
-    name: Publish package to PyPI
-    runs-on: ubuntu-latest
-    if: ${{ github.event.inputs.job == 'pypi'}}
-    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v3
-      with:
-        python-version: '3.7'
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install build
-    - name: Build package
-      run: python -m build
-    - name: Publish package to PyPI
-      uses: pypa/gh-action-pypi-publish@release/v1
-      with:
-        user: __token__
-        password: ${{ secrets.PYPI_API_TOKEN }}
-        verbose: true
-
-  test-pypi:
-    name: Publish package to TestPyPI
-    runs-on: ubuntu-latest
-    if: ${{ github.event.inputs.job == 'test-pypi'}}
-    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v3
-      with:
-        python-version: '3.7'
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install build
-    - name: Build package
-      run: python -m build
-    - name: Publish package to TestPyPI
-      uses: pypa/gh-action-pypi-publish@release/v1
-      with:
-        user: __token__
-        password: ${{ secrets.TEST_PYPI_API_TOKEN }}
-        repository_url: https://test.pypi.org/legacy/
-        verbose: true

From a54f69b3848e8de150b4cace1f22e38b7d3784c7 Mon Sep 17 00:00:00 2001
From: Johann Christensen <johannlange@outlook.de>
Date: Thu, 28 Jul 2022 13:29:13 +0200
Subject: [PATCH 026/108] Reorder imports according to skrl standards

---
 skrl/trainers/torch/base.py       | 2 +-
 skrl/trainers/torch/sequential.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py
index 9635c42d..676215ab 100644
--- a/skrl/trainers/torch/base.py
+++ b/skrl/trainers/torch/base.py
@@ -1,9 +1,9 @@
 from typing import Union, List
 
 import time
+from tqdm import tqdm
 
 import torch
-from tqdm import tqdm
 
 from ...envs.torch import Wrapper
 from ...agents.torch import Agent
diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py
index eed888e8..b9a3ca57 100644
--- a/skrl/trainers/torch/sequential.py
+++ b/skrl/trainers/torch/sequential.py
@@ -1,8 +1,9 @@
 from typing import Union, List
 
-import torch
 from tqdm import tqdm
 
+import torch
+
 from ...envs.torch import Wrapper
 from ...agents.torch import Agent
 

From 323b8ea8e199202a5b42f5c284e4bc3f55473752 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 31 Jul 2022 09:58:28 +0200
Subject: [PATCH 027/108] Update CONTRIBUTING.md

---
 CONTRIBUTING.md | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1abbf65a..53004b68 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -3,13 +3,15 @@ First of all, **thank you**... For what? Because you are dedicating some time to
 
 <hr>
 
-### I don't want to contribute (for now), I just want to ask a question!
+### I just want to ask a question!
 
 If you have a question, please do not open an issue for this. Instead, use the following resources for it (you will get a faster response):
 
 - [skrl's GitHub discussions](https://github.com/Toni-SM/skrl/discussions), a place to ask questions and discuss about the project
 
-- [Isaac Gym's forum](https://forums.developer.nvidia.com/c/agx-autonomous-machines/isaac/isaac-gym/322), , a place to post your questions, find past answers, or just chat with other members of the community about Isaac Gym topics
+- [Isaac Gym's forum](https://forums.developer.nvidia.com/c/agx-autonomous-machines/isaac/isaac-gym/322), a place to post your questions, find past answers, or just chat with other members of the community about Isaac Gym topics
+
+- [Omniverse Isaac Sim's forum](https://forums.developer.nvidia.com/c/agx-autonomous-machines/isaac/simulation/69), a place to post your questions, find past answers, or just chat with other members of the community about Omniverse Isaac Sim/Gym topics
 
 ### I have found a (good) bug. What can I do?
 
@@ -21,10 +23,16 @@ Open an issue on [skrl's GitHub issues](https://github.com/Toni-SM/skrl/issues)
 - A link to the source code of the library that you are using (some problems may be due to the use of older versions. If possible, always use the latest version)
 - Any other information that you think may be useful or help to reproduce/describe the problem
 
-Note: Changes that are cosmetic in nature (code formatting, removing whitespace, etc.) or that correct grammatical, spelling or typo errors, and that do not add anything substantial to the functionality of the library will generally not be accepted as a pull request
-
 ### I want to contribute, but I don't know how
 
+There is a [board](https://github.com/users/Toni-SM/projects/2/views/8) containing relevant future implementations which can be a good starting place to identify contributions. Please consider the following points
+
+#### Notes about contributing
+
+- Try to **communicate your change first** to [discuss](https://github.com/Toni-SM/skrl/discussions) the implementation if you want to add a new feature or change an existing one
+- Modify only the minimum amount of code required and the files needed to make the change
+- Changes that are cosmetic in nature (code formatting, removing whitespace, etc.) or that correct grammatical, spelling or typo errors, and that do not add anything substantial to the functionality of the library will generally not be accepted as a pull request
+
 #### Coding conventions
 
 **skrl** is designed with a focus on modularity, readability, simplicity and transparency of algorithm implementation. The file system structure groups components according to their functionality. Library components only inherit (and must inherit) from a single base class (no multilevel or multiple inheritance) that provides a uniform interface and implements common functionality that is not tied to the implementation details of the algorithms
@@ -39,6 +47,17 @@ Read the code a little bit and you will understand it at first glance... Also
   - Capitalize (the first letter) and omit any trailing punctuation
   - Write it in the imperative tense
   - Aim for about 50 (or 72) characters
+- Add import statements at the top of each module as follows:
+
+  ```ini
+  function annotation (e.g. typing)
+  # insert an empty line 
+  python libraries and other libraries (e.g. gym, numpy, time, etc.)
+  # insert an empty line
+  machine learning framework modules (e.g. torch, torch.nn)
+  # insert an empty line
+  skrl components
+  ```
 
 <hr>
 

From 2008209a2c17f43394abda547e3806d2620d1483 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 31 Jul 2022 12:54:47 +0200
Subject: [PATCH 028/108] Add manual trainer

---
 skrl/trainers/torch/__init__.py |   3 +-
 skrl/trainers/torch/manual.py   | 219 ++++++++++++++++++++++++++++++++
 2 files changed, 221 insertions(+), 1 deletion(-)
 create mode 100644 skrl/trainers/torch/manual.py

diff --git a/skrl/trainers/torch/__init__.py b/skrl/trainers/torch/__init__.py
index 26232c81..1077b277 100644
--- a/skrl/trainers/torch/__init__.py
+++ b/skrl/trainers/torch/__init__.py
@@ -2,4 +2,5 @@
 from .base import generate_equally_spaced_scopes
 
 from .sequential import SequentialTrainer
-from .parallel import ParallelTrainer
\ No newline at end of file
+from .parallel import ParallelTrainer
+from .manual import ManualTrainer
diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py
new file mode 100644
index 00000000..e07ab75b
--- /dev/null
+++ b/skrl/trainers/torch/manual.py
@@ -0,0 +1,219 @@
+from typing import Union, List, Optional
+
+import tqdm
+
+import torch
+
+from ...envs.torch import Wrapper
+from ...agents.torch import Agent
+
+from . import Trainer
+
+
+class ManualTrainer(Trainer):
+    def __init__(self,
+                 cfg: dict,
+                 env: Wrapper,
+                 agents: Union[Agent, List[Agent], List[List[Agent]]],
+                 agents_scope : List[int] = []) -> None:
+        """Manual trainer
+
+        Train agents by manually controlling the training/evaluation loop
+
+        :param cfg: Configuration dictionary
+        :type cfg: dict
+        :param env: Environment to train on
+        :type env: skrl.env.torch.Wrapper
+        :param agents: Agents to train
+        :type agents: Union[Agent, List[Agent]]
+        :param agents_scope: Number of environments for each agent to train on (default: [])
+        :type agents_scope: tuple or list of integers
+        """
+        # TODO: close the environment
+        super().__init__(cfg, env, agents, agents_scope)
+
+        # init agents
+        if self.num_agents > 1:
+            for agent in self.agents:
+                agent.init()
+        else:
+            self.agents.init()
+
+        self._progress = None
+
+        self.states = None
+
+    def train(self, timestep: int, timesteps: Optional[int] = None) -> None:
+        """Execute a training iteration
+
+        This method executes the following steps once:
+
+        - Pre-interaction (sequentially if num_agents > 1)
+        - Compute actions (sequentially if num_agents > 1)
+        - Interact with the environments
+        - Render scene
+        - Record transitions (sequentially if num_agents > 1)
+        - Post-interaction (sequentially if num_agents > 1)
+        - Reset environments
+
+        :param timestep: Current timestep
+        :type timestep: int
+        :param timesteps: Total number of timesteps (default: None).
+                          If None, the total number of timesteps is obtained from the trainer's config
+        :type timesteps: int, optional
+        """
+        timesteps = self.timesteps if timesteps is None else timesteps
+
+        if self._progress is None:
+            self._progress = tqdm.tqdm(total=timesteps)
+        self._progress.update(n=1)
+
+        # reset env
+        if self.states is None:
+            self.states = self.env.reset()
+
+        if self.num_agents == 1:
+            # pre-interaction
+            self.agents.pre_interaction(timestep=timestep, timesteps=timesteps)
+
+            # compute actions
+            with torch.no_grad():
+                actions, _, _ = self.agents.act(self.states, inference=True, timestep=timestep, timesteps=timesteps)
+
+        else:
+            # pre-interaction
+            for agent in self.agents:
+                agent.pre_interaction(timestep=timestep, timesteps=timesteps)
+
+            # compute actions
+            with torch.no_grad():
+                actions = torch.vstack([agent.act(self.states[scope[0]:scope[1]],
+                                                  inference=True,
+                                                  timestep=timestep,
+                                                  timesteps=timesteps)[0] \
+                                        for agent, scope in zip(self.agents, self.agents_scope)])
+
+        # step the environments
+        next_states, rewards, dones, infos = self.env.step(actions)
+
+        # render scene
+        if not self.headless:
+            self.env.render()
+
+        if self.num_agents == 1:
+            # record the environments' transitions
+            with torch.no_grad():
+                self.agents.record_transition(states=self.states,
+                                              actions=actions,
+                                              rewards=rewards,
+                                              next_states=next_states,
+                                              dones=dones,
+                                              infos=infos,
+                                              timestep=timestep,
+                                              timesteps=timesteps)
+
+            # post-interaction
+            self.agents.post_interaction(timestep=timestep, timesteps=timesteps)
+
+        else:
+            # record the environments' transitions
+            with torch.no_grad():
+                for agent, scope in zip(self.agents, self.agents_scope):
+                    agent.record_transition(states=self.states[scope[0]:scope[1]],
+                                            actions=actions[scope[0]:scope[1]],
+                                            rewards=rewards[scope[0]:scope[1]],
+                                            next_states=next_states[scope[0]:scope[1]],
+                                            dones=dones[scope[0]:scope[1]],
+                                            infos=infos,
+                                            timestep=timestep,
+                                            timesteps=timesteps)
+
+            # post-interaction
+            for agent in self.agents:
+                agent.post_interaction(timestep=timestep, timesteps=timesteps)
+
+        # reset environments
+        with torch.no_grad():
+            if dones.any():
+                self.states = self.env.reset()
+            else:
+                self.states.copy_(next_states)
+
+
+    def eval(self, timestep: int, timesteps: Optional[int] = None) -> None:
+        """Evaluate the agents sequentially
+
+        This method executes the following steps in loop:
+
+        - Compute actions (sequentially if num_agents > 1)
+        - Interact with the environments
+        - Render scene
+        - Reset environments
+
+        :param timestep: Current timestep
+        :type timestep: int
+        :param timesteps: Total number of timesteps (default: None).
+                          If None, the total number of timesteps is obtained from the trainer's config
+        :type timesteps: int, optional
+        """
+        timesteps = self.timesteps if timesteps is None else timesteps
+
+        if self._progress is None:
+            self._progress = tqdm.tqdm(total=timesteps)
+        self._progress.update(n=1)
+
+        # reset env
+        if self.states is None:
+            self.states = self.env.reset()
+        
+        with torch.no_grad():
+            if self.num_agents == 1:
+                # compute actions
+                actions, _, _ = self.agents.act(self.states, inference=True, timestep=timestep, timesteps=timesteps)
+
+            else:
+                # compute actions
+                actions = torch.vstack([agent.act(self.states[scope[0]:scope[1]],
+                                                  inference=True,
+                                                  timestep=timestep,
+                                                  timesteps=timesteps)[0] \
+                                        for agent, scope in zip(self.agents, self.agents_scope)])
+
+        # step the environments
+        next_states, rewards, dones, infos = self.env.step(actions)
+
+        # render scene
+        if not self.headless:
+            self.env.render()
+
+        with torch.no_grad():
+            if self.num_agents == 1:
+                # write data to TensorBoard
+                super(type(self.agents), self.agents).record_transition(states=self.states,
+                                                                        actions=actions,
+                                                                        rewards=rewards,
+                                                                        next_states=next_states,
+                                                                        dones=dones,
+                                                                        infos=infos,
+                                                                        timestep=timestep,
+                                                                        timesteps=timesteps)
+                super(type(self.agents), self.agents).post_interaction(timestep=timestep, timesteps=timesteps)
+
+            else:
+                # write data to TensorBoard
+                for agent, scope in zip(self.agents, self.agents_scope):
+                    super(type(agent), agent).record_transition(states=self.states[scope[0]:scope[1]],
+                                                                actions=actions[scope[0]:scope[1]],
+                                                                rewards=rewards[scope[0]:scope[1]],
+                                                                next_states=next_states[scope[0]:scope[1]],
+                                                                dones=dones[scope[0]:scope[1]],
+                                                                infos=infos,
+                                                                timestep=timestep,
+                                                                timesteps=timesteps)
+                    super(type(agent), agent).post_interaction(timestep=timestep, timesteps=timesteps)
+
+            # reset environments
+            if dones.any():
+                self.states = self.env.reset()
+            else:
+                self.states.copy_(next_states)

From 43b660e4288c15298eabb095b1604293a22fbf23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 31 Jul 2022 14:33:48 +0200
Subject: [PATCH 029/108] Change trainer's cfg parameter position and use
 default config

---
 skrl/trainers/torch/base.py       | 20 ++++++++------------
 skrl/trainers/torch/manual.py     | 23 ++++++++++++++++-------
 skrl/trainers/torch/parallel.py   | 28 ++++++++++++++++++++--------
 skrl/trainers/torch/sequential.py | 28 +++++++++++++++++++---------
 4 files changed, 63 insertions(+), 36 deletions(-)

diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py
index 676215ab..538e08a1 100644
--- a/skrl/trainers/torch/base.py
+++ b/skrl/trainers/torch/base.py
@@ -1,7 +1,6 @@
 from typing import Union, List
 
-import time
-from tqdm import tqdm
+import tqdm
 
 import torch
 
@@ -33,20 +32,20 @@ def generate_equally_spaced_scopes(num_envs: int, num_agents: int) -> List[int]:
 
 class Trainer():
     def __init__(self,
-                 cfg: dict,
                  env: Wrapper,
-                 agents: Union[Agent, List[Agent], List[List[Agent]]],
-                 agents_scope : List[int] = []) -> None:
+                 agents: Union[Agent, List[Agent]],
+                 agents_scope : List[int] = [],
+                 cfg: dict = {}) -> None:
         """Base class for trainers
 
-        :param cfg: Configuration dictionary
-        :type cfg: dict
         :param env: Environment to train on
         :type env: skrl.env.torch.Wrapper
         :param agents: Agents to train
         :type agents: Union[Agent, List[Agent]]
         :param agents_scope: Number of environments for each agent to train on (default: [])
         :type agents_scope: tuple or list of integers
+        :param cfg: Configuration dictionary (default: {})
+        :type cfg: dict, optional
         """
         self.cfg = cfg
         self.env = env
@@ -59,9 +58,6 @@ def __init__(self,
 
         self.initial_timestep = 0
 
-        self._timestamp = None
-        self._timestamp_elapsed = None
-
         # setup agents
         self.num_agents = 0
         self._setup_agents()
@@ -165,7 +161,7 @@ def single_agent_train(self) -> None:
         # reset env
         states = self.env.reset()
 
-        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)):
 
             # pre-interaction
             self.agents.pre_interaction(timestep=timestep, timesteps=self.timesteps)
@@ -220,7 +216,7 @@ def single_agent_eval(self) -> None:
         # reset env
         states = self.env.reset()
 
-        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)):
 
             # compute actions
             with torch.no_grad():
diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py
index e07ab75b..938fdc80 100644
--- a/skrl/trainers/torch/manual.py
+++ b/skrl/trainers/torch/manual.py
@@ -1,5 +1,6 @@
 from typing import Union, List, Optional
 
+import copy
 import tqdm
 
 import torch
@@ -10,27 +11,35 @@
 from . import Trainer
 
 
+MANUAL_TRAINER_DEFAULT_CONFIG = {
+    "timesteps": 100000,        # number of timesteps to train for
+    "headless": False,          # whether to use headless mode (no rendering)
+}
+
+
 class ManualTrainer(Trainer):
     def __init__(self,
-                 cfg: dict,
                  env: Wrapper,
-                 agents: Union[Agent, List[Agent], List[List[Agent]]],
-                 agents_scope : List[int] = []) -> None:
+                 agents: Union[Agent, List[Agent]],
+                 agents_scope : List[int] = [],
+                 cfg: dict = {}) -> None:
         """Manual trainer
 
         Train agents by manually controlling the training/evaluation loop
 
-        :param cfg: Configuration dictionary
-        :type cfg: dict
         :param env: Environment to train on
         :type env: skrl.env.torch.Wrapper
         :param agents: Agents to train
         :type agents: Union[Agent, List[Agent]]
         :param agents_scope: Number of environments for each agent to train on (default: [])
         :type agents_scope: tuple or list of integers
+        :param cfg: Configuration dictionary (default: {}).
+                    See MANUAL_TRAINER_DEFAULT_CONFIG for default values
+        :type cfg: dict, optional
         """
-        # TODO: close the environment
-        super().__init__(cfg, env, agents, agents_scope)
+        _cfg = copy.deepcopy(MANUAL_TRAINER_DEFAULT_CONFIG)
+        _cfg.update(cfg)
+        super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg)
 
         # init agents
         if self.num_agents > 1:
diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py
index 17d1e72a..c5526223 100644
--- a/skrl/trainers/torch/parallel.py
+++ b/skrl/trainers/torch/parallel.py
@@ -1,5 +1,8 @@
 from typing import Union, List
 
+import copy
+import tqdm
+
 import torch
 import torch.multiprocessing as mp
 
@@ -9,6 +12,12 @@
 from . import Trainer
 
 
+PARALLEL_TRAINER_DEFAULT_CONFIG = {
+    "timesteps": 100000,        # number of timesteps to train for
+    "headless": False,          # whether to use headless mode (no rendering)
+}
+
+
 def fn_processor(process_index, *args):
     print("[INFO] Processor {}: started".format(process_index))
 
@@ -92,24 +101,27 @@ def fn_processor(process_index, *args):
 
 class ParallelTrainer(Trainer):
     def __init__(self,
-                 cfg: dict,
                  env: Wrapper,
-                 agents: Union[Agent, List[Agent], List[List[Agent]]],
-                 agents_scope : List[int] = []) -> None:
+                 agents: Union[Agent, List[Agent]],
+                 agents_scope : List[int] = [],
+                 cfg: dict = {}) -> None:
         """Parallel trainer
 
         Train agents in parallel using multiple processes
 
-        :param cfg: Configuration dictionary
-        :type cfg: dict
         :param env: Environment to train on
         :type env: skrl.env.torch.Wrapper
         :param agents: Agents to train
         :type agents: Union[Agent, List[Agent]]
         :param agents_scope: Number of environments for each agent to train on (default: [])
         :type agents_scope: tuple or list of integers
+        :param cfg: Configuration dictionary (default: {}).
+                    See PARALLEL_TRAINER_DEFAULT_CONFIG for default values
+        :type cfg: dict, optional
         """
-        super().__init__(cfg, env, agents, agents_scope)
+        _cfg = copy.deepcopy(PARALLEL_TRAINER_DEFAULT_CONFIG)
+        _cfg.update(cfg)
+        super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg)
 
         mp.set_start_method(method='spawn', force=True)
 
@@ -175,7 +187,7 @@ def train(self) -> None:
         if not states.is_cuda:
             states.share_memory_()
 
-        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)):
 
             # pre-interaction
             for pipe in producer_pipes:
@@ -300,7 +312,7 @@ def eval(self) -> None:
         if not states.is_cuda:
             states.share_memory_()
 
-        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)):
 
             # compute actions
             with torch.no_grad():
diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py
index b9a3ca57..42114ad0 100644
--- a/skrl/trainers/torch/sequential.py
+++ b/skrl/trainers/torch/sequential.py
@@ -1,6 +1,7 @@
 from typing import Union, List
 
-from tqdm import tqdm
+import copy
+import tqdm
 
 import torch
 
@@ -10,26 +11,35 @@
 from . import Trainer
 
 
+SEQUENTIAL_TRAINER_DEFAULT_CONFIG = {
+    "timesteps": 100000,        # number of timesteps to train for
+    "headless": False,          # whether to use headless mode (no rendering)
+}
+
+
 class SequentialTrainer(Trainer):
     def __init__(self,
-                 cfg: dict,
                  env: Wrapper,
-                 agents: Union[Agent, List[Agent], List[List[Agent]]],
-                 agents_scope : List[int] = []) -> None:
+                 agents: Union[Agent, List[Agent]],
+                 agents_scope : List[int] = [],
+                 cfg: dict = {}) -> None:
         """Sequential trainer
 
         Train agents sequentially (i.e., one after the other in each interaction with the environment)
 
-        :param cfg: Configuration dictionary
-        :type cfg: dict
         :param env: Environment to train on
         :type env: skrl.env.torch.Wrapper
         :param agents: Agents to train
         :type agents: Union[Agent, List[Agent]]
         :param agents_scope: Number of environments for each agent to train on (default: [])
         :type agents_scope: tuple or list of integers
+        :param cfg: Configuration dictionary (default: {}).
+                    See SEQUENTIAL_TRAINER_DEFAULT_CONFIG for default values
+        :type cfg: dict, optional
         """
-        super().__init__(cfg, env, agents, agents_scope)
+        _cfg = copy.deepcopy(SEQUENTIAL_TRAINER_DEFAULT_CONFIG)
+        _cfg.update(cfg)
+        super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg)
 
         # init agents
         if self.num_agents > 1:
@@ -59,7 +69,7 @@ def train(self) -> None:
         # reset env
         states = self.env.reset()
 
-        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)):
 
             # pre-interaction
             for agent in self.agents:
@@ -124,7 +134,7 @@ def eval(self) -> None:
         # reset env
         states = self.env.reset()
 
-        for timestep in tqdm(range(self.initial_timestep, self.timesteps)):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)):
 
             # compute actions
             with torch.no_grad():

From 5596883c42ef5edf062b45822d6aa1e4c639140b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 31 Jul 2022 23:11:05 +0200
Subject: [PATCH 030/108] Add manual trainer to docs

---
 docs/source/index.rst                         |  2 +
 docs/source/modules/skrl.trainers.manual.rst  | 44 ++++++++++++++++++
 .../source/modules/skrl.trainers.parallel.rst | 10 +++++
 .../modules/skrl.trainers.sequential.rst      | 10 +++++
 docs/source/snippets/trainer.py               | 45 ++++++++++++++++---
 5 files changed, 104 insertions(+), 7 deletions(-)
 create mode 100644 docs/source/modules/skrl.trainers.manual.rst

diff --git a/docs/source/index.rst b/docs/source/index.rst
index e26408f7..f9c96941 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -150,6 +150,7 @@ Trainers
 
     * :doc:`Sequential trainer <modules/skrl.trainers.sequential>`
     * :doc:`Parallel trainer <modules/skrl.trainers.parallel>`
+    * :doc:`Manual trainer <modules/skrl.trainers.manual>`
 
 .. toctree::
     :maxdepth: 1
@@ -159,6 +160,7 @@ Trainers
     modules/skrl.trainers.base_class
     modules/skrl.trainers.sequential
     modules/skrl.trainers.parallel
+    modules/skrl.trainers.manual
 
 Resources
 ^^^^^^^^^
diff --git a/docs/source/modules/skrl.trainers.manual.rst b/docs/source/modules/skrl.trainers.manual.rst
new file mode 100644
index 00000000..61c43c86
--- /dev/null
+++ b/docs/source/modules/skrl.trainers.manual.rst
@@ -0,0 +1,44 @@
+Manual trainer
+==============
+
+Concept
+^^^^^^^
+
+.. image:: ../_static/imgs/manual_trainer.svg
+    :width: 100%
+    :align: center
+    :alt: Manual trainer
+
+Basic usage
+^^^^^^^^^^^
+
+.. tabs::
+            
+    .. tab:: Snippet
+
+        .. literalinclude:: ../snippets/trainer.py
+            :language: python
+            :linenos:
+            :start-after: [start-manual]
+            :end-before: [end-manual]
+
+Configuration
+^^^^^^^^^^^^^
+
+.. py:data:: skrl.trainers.torch.manual.MANUAL_TRAINER_DEFAULT_CONFIG
+
+.. literalinclude:: ../../../skrl/trainers/torch/manual.py
+    :language: python
+    :lines: 14-17
+    :linenos:
+
+API
+^^^
+
+.. autoclass:: skrl.trainers.torch.manual.ManualTrainer
+    :undoc-members:
+    :show-inheritance:
+    :inherited-members:
+    :members:
+
+    .. automethod:: __init__
diff --git a/docs/source/modules/skrl.trainers.parallel.rst b/docs/source/modules/skrl.trainers.parallel.rst
index 1e8a0f46..4e3751e3 100644
--- a/docs/source/modules/skrl.trainers.parallel.rst
+++ b/docs/source/modules/skrl.trainers.parallel.rst
@@ -30,6 +30,16 @@ Basic usage
             :start-after: [start-parallel]
             :end-before: [end-parallel]
 
+Configuration
+^^^^^^^^^^^^^
+
+.. py:data:: skrl.trainers.torch.parallel.PARALLEL_TRAINER_DEFAULT_CONFIG
+
+.. literalinclude:: ../../../skrl/trainers/torch/parallel.py
+    :language: python
+    :lines: 15-18
+    :linenos:
+
 API
 ^^^
 
diff --git a/docs/source/modules/skrl.trainers.sequential.rst b/docs/source/modules/skrl.trainers.sequential.rst
index 14c23b1b..9866a420 100644
--- a/docs/source/modules/skrl.trainers.sequential.rst
+++ b/docs/source/modules/skrl.trainers.sequential.rst
@@ -22,6 +22,16 @@ Basic usage
             :start-after: [start-sequential]
             :end-before: [end-sequential]
 
+Configuration
+^^^^^^^^^^^^^
+
+.. py:data:: skrl.trainers.torch.sequential.SEQUENTIAL_TRAINER_DEFAULT_CONFIG
+
+.. literalinclude:: ../../../skrl/trainers/torch/sequential.py
+    :language: python
+    :lines: 14-17
+    :linenos:
+
 API
 ^^^
 
diff --git a/docs/source/snippets/trainer.py b/docs/source/snippets/trainer.py
index 2ff4c9e4..2e54ad44 100644
--- a/docs/source/snippets/trainer.py
+++ b/docs/source/snippets/trainer.py
@@ -1,29 +1,39 @@
 # [start-base]
 from typing import Union, List
 
+import copy
+
 from skrl.envs.torch import Wrapper   # from ...envs.torch import Wrapper
 from skrl.agents.torch import Agent   # from ...agents.torch import Agent
 
 from skrl.trainers.torch import Trainer       # from . import Trainer
 
 
+CUSTOM_DEFAULT_CONFIG = {
+    "timesteps": 100000,        # number of timesteps to train for
+    "headless": False,          # whether to use headless mode (no rendering)
+}
+
+
 class CustomTrainer(Trainer):
     def __init__(self, 
-                 cfg: dict, 
                  env: Wrapper, 
                  agents: Union[Agent, List[Agent], List[List[Agent]]], 
-                 agents_scope : List[int] = []) -> None:
+                 agents_scope : List[int] = [],
+                 cfg: dict = {}) -> None:
         """
-        :param cfg: Configuration dictionary
-        :type cfg: dict
         :param env: Environment to train on
         :type env: skrl.env.torch.Wrapper
         :param agents: Agents to train
         :type agents: Union[Agent, List[Agent]]
         :param agents_scope: Number of environments for each agent to train on (default: [])
         :type agents_scope: tuple or list of integers
+        :param cfg: Configuration dictionary
+        :type cfg: dict, optional
         """
-        super().__init__(cfg, env, agents, agents_scope)
+        _cfg = copy.deepcopy(CUSTOM_DEFAULT_CONFIG)
+        _cfg.update(cfg)
+        super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg)
 
         # ================================
         # - init agents
@@ -66,7 +76,7 @@ def eval(self) -> None:
 
 # create a sequential trainer
 cfg = {"timesteps": 50000, "headless": False}
-trainer = SequentialTrainer(cfg=cfg, env=env, agents=agents)
+trainer = SequentialTrainer(env=env, agents=agents, cfg=cfg)
 
 # train the agent(s)
 trainer.train()
@@ -85,7 +95,7 @@ def eval(self) -> None:
 
 # create a sequential trainer
 cfg = {"timesteps": 50000, "headless": False}
-trainer = ParallelTrainer(cfg=cfg, env=env, agents=agents)
+trainer = ParallelTrainer(env=env, agents=agents, cfg=cfg)
 
 # train the agent(s)
 trainer.train()
@@ -93,3 +103,24 @@ def eval(self) -> None:
 # evaluate the agent(s)
 trainer.eval()
 # [end-parallel]
+
+# =============================================================================
+
+# [start-manual]
+from skrl.trainers.torch import ManualTrainer
+
+# asuming there is an environment called 'env'
+# and an agent or a list of agents called 'agents'
+
+# create a sequential trainer
+cfg = {"timesteps": 50000, "headless": False}
+trainer = ManualTrainer(env=env, agents=agents, cfg=cfg)
+
+# train the agent(s)
+for timestep in range(cfg["timesteps"]):
+    trainer.train(timestep=timestep)
+
+# evaluate the agent(s)
+for timestep in range(cfg["timesteps"]):
+    trainer.eval(timestep=timestep)
+# [end-manual]
\ No newline at end of file

From 3cf64d83e72703c80aeda06fadc483cea5fea4cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 31 Jul 2022 23:13:15 +0200
Subject: [PATCH 031/108] Update images

---
 docs/source/_static/imgs/manual_trainer.svg | 1 +
 docs/source/_static/imgs/rl_schema.svg      | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100755 docs/source/_static/imgs/manual_trainer.svg

diff --git a/docs/source/_static/imgs/manual_trainer.svg b/docs/source/_static/imgs/manual_trainer.svg
new file mode 100755
index 00000000..460497a3
--- /dev/null
+++ b/docs/source/_static/imgs/manual_trainer.svg
@@ -0,0 +1 @@
+<svg width="4263" height="2654" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="1730" y="364" width="4263" height="2654"/></clipPath><clipPath id="clip1"><rect x="-0.363636" y="-0.272727" width="2324793" height="1313411"/></clipPath><image width="365" height="206" xlink:href="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBT/wAARCADOAW0DASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDs477xPIu7+zrbrj7w/wDi6RpteZhvsoBk84Yf/FVd0/xFaw3kdqk8N5vy29ZgccHjHPp+tWJdc0+1lZpLy2LMxPltMoK+3Wp5vI307meF1CT5ZIEU9eCP8akk8Ktc/LcrJGD/AHHXp+tTDWrXzmuhLC0LjaF8wbfwP4Gsa1+Ii30glMIjK/L5JuM7uOvT39O1VeXYVolyHwvaW8jLDJM7E4YMw6/lWja6LJFIUjVmYDuwrldd+IcunXdgY9Idkmc75FlIAGV5Py+9St8Qri4+azsJJpD1WCYsQPXhenSn7wWidj5Ni3SaQj6f/WqSPTUZJHjLMqjOciuP/wCExurNgNU0GbR7jqtteExuy9mAZAcZyM47GoYfiNdTTuDpU1tbxtjzPOISRc9fugYwP1pe8x+6dokaJyxIFM2lVL9hXF618TPJt1+y6b9rfeMrDPk4weeFPHStHXviEtrCyHTRbsVB2mbaevX7tFpdhXidKF+6T0brTtsSn7xxSeJ7+x/4Rnw1Po95b6pdXlmHu4bGRWezcpGQsm0k5yWHIH3T+Hn2qePLixJ017KRZoSGadpirNkZwRt/2h37U7S7BeJ2hN2to8qxKdpx1+nvVe5l1eGFZI7WJtylhuI9OP4q4rU9c1DSdFuIhJc3RJVvM8xhj5gMd/T9a6L4daNF4m8M+KL/AFLxyml3ljZrPZaZdTAyXchSRvJi3SA7sqq/KpOWHHQF8rJ5kPvNQ8TWtjHdNp1sIHbarbgcnntvz2NEl/4ptbxIZdNtUdl3AbgeOf8Ab9q4uy17UE1Wa1v7y5W2RMrDczMEVuOQCcA8n8zU2peIXsbhdQudcYmFceTLc43AkjOS3v6dqOUOZHbTah4oiMEc2m2qSXHy2yhgd54xn5+Oo6461DcXnikSGzj021bU4/mlgLDCr2Od+O69+9cJrXiy/abR7m0v7jUN7eYscNwzeVypAGCf6dK0Y9U1PU5TcT3t3pM7j5ppJWDHHAUkkHoAfwp8vYV0dlef8JJqV0lpDp9u/mL/AHwDxk939qwbjSdYkuisVojC3ci9+df3WDzj5uejdM9KytS8QavY3SR30974fjK7jf3ErxCIZOOTtxk/L171ueMdPuPCmgaHdwa7JqP9vWplMsbld3yIQ24Md+fMzn/GmlZhzIo/6bHfSRiFfsYHySZG4nj3+vbtUGl397e6dLc+TH9nR9rOOMHjtnPcVa0fw54gvNDtruGx1K9jkLASRwSODhiOoBz0xWfo2i6xpNq1rqFlfWULuXY3MTxr0ABIIHcYrTyINCxtdVWG8uXtkUhfMtPmGJOpGfm/3fTrWuuqeJNP8N2moPp9splkMfLAjq3YPn+GuIl1250XUDHPdyywSy7UEkxVVUHsDxjBH5VWvviHHJqEtg+pqLeIB1ha8+RTgchc4B+Y/nUsD0i9ufE2nuIpdOt0nYblXcDkZ9n9jTrjWvEluLOJrC1ElzhUXPU8DH3+Oo61wlj4pu/Eky38mqTQPHmIW7XJcsMZz1H9707Vv6dbX3iK1vtQu9TuNGt9DTz1mmZtkygFiwYlQoAjBzz96psx6GpNqXilb6S1/s2186MAsu4cDA778dxTYNW8TzTLC2nWolbooYdP++/Y1W8WpZw/DHSPFWneNYL++vrwwPb2twplVB5o3F1kJI/dr27j0rGvNUuINDuXOoSQ3YYbH80iQDK9DnOOv60uUq6Owt7jVpmINrFujP74Aj5fX+L2NX7eaSRyrKobGSBXA6H/AG5cabfOX1BUkhDfajvIwVb58/rnNZel69f6Tq80Mt1cXqrHgSPM2CTtPv8ASlyMfMj1w/Z2bAdivc4/+tUOFLMEOeeK4aTVrvwnIGmebUgBv2SOVzn5cc56dafY+PbW1ma9uZ4YwGEwtZrkDODkpz+XT8KXIx3R2qsBIUJwwHIqZTDKwMTlo+hPvXGT/Eq01a4e6jtobSCT7rLcArwMYB2j0NWfEHj0/wBoRpa6R5EBjG54ZMKGyeuFxnp+lHLId4nVyRQqpdmYBRk/5xVbesnzod0R6NWBfeOIp7K1jhtleXy9rrHMCxbA4IA69ajtfGcFvbpFeW8diq5/eTyhQTnpyBzj+VRyS7BeJ0cuiWdwpe4llRBwSpH+FY194V02ORXSe4JYkrkjn0/hqhp3xAlurNwNLeaPfgy+aWA6cfd/zmku/HUVs0S/Yknkk4VPOGUPHAG3rz+lXyzC8TWt9AkgjWRFYoeASwokhuLdsPGA/UDI/wAaqQ+PXhtkMmmMozja0uMcn/Zq/pHiaz1ORRIsAYkjLSqx4GfSj3uwe6Rbb/y2aOBGIGRkj/Govt2tJGBFaQtJ3Unt/wB9fStKHxBp8VxcxzzW0CBtqtJKoGMnkZrUt7jR1tUvI9UsbnzMr5ayIccnnOfb9aV5B7py8mr+JYTgafa+vJ/+zpr654pQxj+zbT5unP8A9nW//a2lLMq/b7OZyOF81Mn9amGoaWZEMl5aIuerSLhf1pXfYrlRzn9q+KtxJ021x/vD/wCLpG1bxOpwdOtR/wACH/xddLdahppYmK/tXjzwySrg8exoW40qcbn1azhPTa0qfn96jmfYnlXc8qhaGW4R9IRrWYcK0vODjnue1SXD6fb6Tq+panBJcHTYHuJjGSC21WZtoyASdvGcfhWx4P8AhF4YvLuN9Z1DUbSIOwZoWU4G3g/6tu9d8vwJ+CvhezurjTvFviGe91KMyXMc4BVJACcLi2HGXbqT0HNdDlqYWPNfgZ4q8N/EzV5bX7BeHTltJLiKG4+QqyyIpOVfP8Td+9Vyujad4rsvD7Wkv9p3URmimViYlUB8gndnPyN2PUVe1D4awLrVwfDjXN/abR5ck8iKSMDceQvRsjpWD4o+EHiDwnqEejeJtPfTbi4iE4SO4ikJjyQDlGYfeQjHXile4x/iFnS8SKY77eCRlKL12ggED8BXT+GdOs5NOhvNPhMEsgZS0jE/LuIxjJHUCuXh8O2/hzS7jTpXlS11eH7PK7EM4TaVJXA4OHPUHtXovwf1i5+DNhDceE449SJhktv+JmCw2NJvY/KU53KB9O3enzCOB8balrniDxDbS6vex3dyLcIrqioAgZyB8qjuT+dbdrZR6lo4ijXDwwBZCxIBJXHH5GtW4/aI8IfFr4U614i8UavHp/gK1uUs9R1XTrSdZIXDxNGAjK7nLyRAkIRhu2CR2Nl4I+GHx4+FNzJ4V8Rarqt5oui7tGjRDAtzJJAfIEvmwLwxiTumMnOO0c3cZ5h4P8B3OteIrm1he3UJAZP3jsB1QdgfWkv7W28WSDyoyJNuwNKSvQ7uxNRt8NNW8K+CNPsbm0eLxHFMwurNpo2SNCXZSGU4JwUPDHqfw6rwj8K9O8R6DcaX4amvNR0yecmSaSREkEqhWKjcq8YVe3c8+j5rq42rOxw2g6frK6rN4b0O7hstb1iZdO0+4mG6GK4ZjHE8hKsdoZ1JwrcA8HpW5qnwf8U+Ebp9G8aahpureLrYg3t/YO4glDDdHtHlp0jaMH5ByD16nQuLS0+Hq6hpU8rx3Mwa1jjl+cs6ZXGVGOrDnpXpXwp+NWteBfDdna2Vrp8rRpImLiORjhpS3Zx3pcwj5f8ACOp63eeGbrXNevI77SLa48qeCJFWVshNuMKo4Z1P3h0NdTZ6Smora3mmqtvvCzRecTlc4K569K7X9oyHWP2gNPuNC1W0jg0m8toYpp9OYRyqY5vNXb5jMPvBc/KeCfrXHeD/AA34d+CfiH4f+HLW/untNfurTT9Re9/eSRIjxRkxlEADYlfkhhkDj1iLklq7sCK6sU0/UZbrX1F8jgJi3JB3YGD/AA8YBrp08M+Clvo7fxPo8+oxsMstvNIuVwdo4kXncM17F4k8C/DfS/E15cadr+qT3LBUZJV+ULtU5/1I5yB3rzrxl4J8Gab4gtvG39rah9r02AQIrDMJDFl+ZRHuJ/enofSq5gPQND8E/CDw/o0Fxd+FL6Rrm3STTzFdTEw4XI35nH95PXofx86+N/gCbRPDFt4z01oLbwxqN6lraWZdmuY3Ebhi4IIwWikP3z1H4SWvh3WvH2veHLvwvZrqa2tzHK5aRYgA7oU4dlznaenp2r0+3+LniL4WeLr7Sbiy0+LWbaEJPbTI8gRXCSA7kfBOCvQ96VwPn74j3dvrWoJda2j3nhlIFju7NPllkbexUgqRwGKH7w6H6G14o1Kz1CP4d2UcUg0lRHDaW7dY4T5IVSc5J27RnJPHWtv4W2cPwnu4rTQGa8tGle5aS/8AmYO0ewj5QvGFXt3PNatqmneP/G0dx4juJLG1s9REjyWQIIRpcyHBDE4C8YH5047ge2fDqytbbwjYWVvEY4Y/MKqWJxmRieSc9TXyj4u+NT/Er4laVaaE91Z+H5bMx3FpewxK7yr5r7gyliBjy+jD7p49fqObwj8FJkCt4v1oDOeIm/8AkauV8O3vwvmHkR+JL9kZySfIkznH/XH2pX1A8n1DwbpOpWts11aebKqZDea45IGTwfasdPhj4Ja4ea60V5Lxhh5FuJQD0xx5g7Adq9A0eG08G+OL3WdHle6RdRF3bNdcq4SUuhIAU4PGRwfpX0F4Z+MXxN8YKG0jw9o12ChcdU+UNtJ+acd6bbuB8R6L/wAI/wCHPjx4b8N6hYzTaXeWktxNbwsSGPlz4+YsGHMa9D2+tfYnhnQfhP4i8P3+lz+F7+S1uLVba4ja4lUOjoysMibIyM8jB5r5x/aS+Dfjbxx8ePD3iDxdoo0vRLXR1tri6sLqH5BvuSny73YkvIo4B4Pbk1DosPizwnNFp/i7S7fStBmZLfwxdRSLJLfW6naHk2O21thgPzKnLnjggTdgS/GL4S6YNWufCXw5tYtA0ywljnht76eWRUQxZcbmMjEmSQnk/lwKr3nw/uJPhTrXj27a3l0bS7hLWeBXYXDMzxKCq4CkZmXqw6H2z9E+G/hx4Gh+H2l+JPE+salpovJHhzb4ZN4eQKABExHyxk81wviTSfCP/C0tHttM1S8uPBb2ZN9qMikTxT4lwqgxgkZEX8B+8efQUmikrniVt4k1vXrHTrXw9erY6d5ax3UN1GhMkTABVB2sfu7h1HUc1598bvEFx8N9BgvY5GS7e8S3eSFVfIMbtjDcfwjt2r6h17WG8N6P4l0/wuE1GLUbeaCJrwEMy7XWPH3MEh+c/pXz14V8QQ/sw+Nr/wCKurutl4g1q2/sW5trtWuLRFPluuxYfnDFbVDkuRy3HIxpCaUryV0SdD8I9B8SfHiRJbXULVYvPktSt8PLOUjEn/LNDx8w/GtP4ifDGw+HOtaHpfia3jv7nWLh7a2axmkKIysisXyUIBMi9Aeh/H6RvF8cfGixluoNGtJYlItS1vIsXKkPjEknX5h7Vwfi/wAC654DutMg1ux+xTak7Jar5scnmMpUEfIxxy69cdaXMRLbexwPiH9nnUvDvgmw8TJLp6+G7y4+zWtok0rTo/7zJbKYxmN/4j1HHozTPC9xrN7HpavD9onyVZ2IQAAtyQM/wntW7D+zvbxeJrvxt4wN/pOhalGLaC7tp4nDzKFULsVXccRSckY+XryM9LrXwK+CFrps1xo/i/xFdaim3yoZgAjZYBs/6KP4ST17UKRfkeT6h4B1bwzqpM1zasHnbZ5TMcbW75UeorL+Jmiz6h4Ts/srRx6l9rBlmkJ2su1xgDB5+727V9Ww+PvCnwx8H6a/hzVHvryzsFM0d9BIVSSKMbRwiZBO7OD27Vxvhr9t7xj4l8ZX2lLpugNYQ2/nRyJbXCuT8gOczdMse3YUcwHg2mTPE4tbRvKtHJZ42Gctjrnk9h37VPq+hqtub2IKssCNMWLH7wGcgdOor0bVr6e68P3UARDuZenX7yn19q5jRtC8GtdNo3xA1a90PxF4hdbTwdZ2Sl11O6clPLkZY5FjG+S2GXaMfvDzwStJ6AcbpuqT6ggFzJ5i4JxtA5zjtXS+KfBOoeFJN1tNbxFYw/yszdWK/wAS12t14bs/2QbGPxX8RJZvDuiXjf2TDdSkXe64fMqpstw7DKwyHJGPlxnJGe58I/Cv4ZfC34e6n4A8T+IdY0wapd/2iRgSy7P3YUq0cDKBugIwRnr6ik5LZC6nzF4k1SHw7daFb60r3b6y/lwm3AwpygbfyuP9YvTPQ1uf2DfyW6LpM0NrBnKrKSSBznqD3r6It/h58ErGwnNh4w1yaZIswrJGcMwB2g/6MOpx6V53420vQ7O1VtNvLi4XzVAMo7bTn+Ed6FK7GeQ6LqFvNdRySI7SgkBvbH1+takuk6nfCaSK4hW3GW2t12nnH3euPepF8FvqJzdrJGvQmN1+o9e9dfa2sOn6alvvby44QhLcnAXHpWgjgrXVo9PmNjcB3MQz8gGOeeuR610fhP4UeKPHWnSX9jqGnxQxymArcMytuADZ4jPGGFU/iBpZ8P8Ahm18RXQaLR7u5FvDckhtz7X42j5h/q25Ixx9K1Lr4+/HX9n2QeHfFPg3w3pWoXQ/tCOGQm4Jib92G3RXTKOYmGCc8e4qZOwGtJbaT4J8SWmg2Pi+z8aWlzGbl9Xt50McLYYeScO4yNgP3h/rBx661+ttNGgjnil3A42kHGfxrkfAXhTw34P1WCfXNOe805Gd5YbWVy7ZQquMsv8AFg9egrL1/wCI2lLqmqw6Pb3dpCk0qWqyop8tdxCA5Y5wMev41HKM9OhmWPQ7e0hiEE8bEtdR8M4y3ynHOOR37V3vh/4b6T44hOqeJPiNZJewubdF1Rklk8sAEYZ5gQuXbjpnNfM/hvWvGHi2+ksNN1aCCWOMzE3ESBdoIBHCHnLCvTr7wd4h8Bzrpfie+tNQ1CZftEctjnYIzlQpyic5Ru3cc1NrAdfrXwE8PWOtWiL8RdM1m3vrgjcIo2SwUsOf9cRjDf7P3PyrfEL4A6ZpOiQPo3xQtLiT7QqmCxRQVXaxJ+Wc8ZA7dTWBBGqoMDsK7rwdr3gHT7eOPxJomoahcKjB3tnIBYtlTxKv8PHSjmYHifxstfA2i7tM8I6Z4e1fwbNbxy3umaLHB/Z01x5p5kjjBjMgCwnLDPyp6Cqnw48d2Eej6hp2krbeDcQR26fY51h/hZVwqhPudh2z2r3fTdF/Z3sfDV2LXwFrMWn+bmSE3kxYt8vIJuv93v2plj4f/Z0DGaHwBrKMxDkm8n69f+fqndAeIWPibUxrVxY3Zu9ahij3DV5pWdZz8vGTnkZK/eP3fy9T+EdnDa+O9Mt21WPQdLYytKSwihDeU+GYblXJIUc+1R/FCTwFaaBbjwdol7pLi5UZuZGceXsfcPmlfktg/hXh+vfEi9uJ1OlTyW3yAfvYoz82Tk9+1CjcD2n4/fCXTtS8SaFq+leNbW/Frdz3Vxa2aK/nfPGwRysp64YZIPU8V5/4L07XPiN8VtZ8HjTdQ8L6RYWQvYfEQikaC5b9yDCowi5zK3Rz/qjx6cZpvjHxLeXCQx6kglkdU3NCmMk4z92vafhn4f8AiD40k/sjwtrun6d4kghe4uby9QeS8AcDauIn+bLx/wAI+6efUs0Bur+yjYRzKW/aDtj/ALJC8/8Ak3XWp+zp4Jli01r34jaBqF9YKpgvZ4IGkSQY/eKTOSrEqDkHOQOeK8q8tvtC+YQxx2/GoPEGqLoeg6jqDhjHZ20lwwQAthFLcA8Z471HW4dblv4hX2leC/F1/psOv2evtB5Y+1pOgMgaNWzgM3TO3r2/CvNIfihN4pmXSm8MySQXH3pDIZFBX5sFfLwfujv3rb1zWvh74k+CegeKLfQtQTxZfXzR3moSuQssStOgXYJSgOI4+ij7vXk59y8HXHwAuvDd14k0vwNrVtbWdwYGWW5kMm4hASB9qIxiQdT601YDnfCXxI03wp4N1PSbCxtY9V1KwW1jvLadYp9NmEbKsigLu3qz5GCpBTqO3hnxO03xJ/Z0d7Freqa5rklwqz3SNI9y6bGwXbczEABByccL7V9J2PiL4By380kPgjXFnaXc7NO/LEnn/j69c1q3Wr/ByQb7Xwnq0cxPLNM/T0/4+D7UgPDPGhOl+E765g/cyIUxNH8hXLqOo6df1pvw7t2vhCt1KbZLzyQ11N0Abq5JxnGc9a81/wCE01XXkOn6hdG40+b/AFkPlIu7HzDkAHqB37V6hoskcel6fEikJ5MaoPQbQBWmyA9q0P4L+GZo1u7j4maTEZFI+zSiL5MHGeZ/b071reFvhj4C8Ja9a6jrviTw5p6w7idL1CO3hEqshUP88nTJ67Tyn5eT+HLjSbXUpf7ZtZbu28ohUhJBD5GD95eMZ/OsB2k8fWr3Xidv7T1BT5Mcy/ugIxhguE2jqzHOM81mBW+J2v6P4a+IE0ej6xY65Z6rqc6wxWNwnl2qeaNiqFLAqQ/GMDC8e3RaJ4q1nSoVOn6nfafwV221y8eATkj5SOM815z8fvhbbfCa9+FmrxxQpBrkjXW23lkkcqptmwwfgHEvY+vtXRaXqD6lbpc2zGOBwdqyAbhg4Pr3Bq7XQG5qura54suFt7jWtQ1G4ddqxSTyTMwBLYALEkDk15T8ftU11bPQNHju9QuJJI57Rb1ZJCdHOI0EgGfkK5z1X/V9Rjj3j4Y6p4X8N6lBf+ItOur+5hkfbJaseEMe0DG9RnJP511utaj8Cteknm1DwXrNw8pd5D9okXJblulyOtQNW6nwva6X8QbfwzZ6UnxK8S+JLeB2dbdbq4kRSSx3hPOYDG4jP+1717pomvXWsWzPLps1kQ5XDknoAc/dHrXtOhSfAHSZj/Z/gfWoG2Fcm5lbgkEjm6PeuK8c3nhv+3IF8Mafcadp3kDzIbhizGTc2WyXbjG3v26U9BG/4T+Emk+KLQXN78RbLQ32RyCC4CZBYEleZl+7jHT8qofFH9jvwf8AE7w/b6ZdfGDQ4PKulufNmtYZ8kI64wbkf3+ue1c3awm61DT7NCBLfSrDGW+6CxAGfbkdM1d17Q5vD+rT6fctHJNDt3NESV5UMMZA7GgfSx6X4i8Nj4f6Dcz6B8TvtjRlZBZabceUZGZghOEmPIHPToK4CSTXPHV9ZLqOpaheJbyALeXTvOLMORmQFj8v3Qc5H3evFcx4y16Pwr4bvNUnEjRQbMiEAt8zqvAJA/iqj4b+IWp3mkTvpV01tFdwKWWaKMkhlO0dDjqelNK4vU9n1LwDo1l4dtW8Q/Gqxg00y4jsdTukWGOT5sEB7jAYjceBnDH3rxqPXLrVNPlvtD0qbXLONvLefT8yxq/GVLIpAIDKcdeR61Wf4K+L/wBopB4dsdT0uO4tj/aDNqDvFHtX93wY4mOf3o4xjrXT+DXg8AKNL0FGsfDc0jXFzZ/6x3mKhdwZyWH3I+AwHy9OTlbbgcBqnxDvLSGe3k8LT3JZWjlVmP7rjBDDyz78H0rzTwGLzVPjTr13DLPpcMmnjbYRltsWPIGQBjrgnoPvV9mabe/B14ZX1fwpqt1eXChriSOdwHkIO84FwMZJPQD8KZp9r+z3ousz6ta+BNaiv54/Kkm+1StuX5eMG6x/CvbtQ7dAOA8BfDlPEN1Gup+M10iIyMrC7Hy4CZBO6Rep4r3Pwv8ACH4dabZ3L614r8L+I9WSNW0fU7+K2afRZgDia3ZpGaN93ltlCpzEvPAxx3i7XfhlqnhW9j8M+HdS07WWKCC4upGKJh1LZHnN1XcOh5P415ryqgMctjmi7A9V+KXwn0Hxh4ft7TxH8RdN8c2KXSzJp+qeXdxRyBHAlCPM4DAMy5xnDkZ5rynWPtXiC4WW+vJrq4VAizXDGRwoJOASc45PHuaw/H3jKDwD4bttTulmeKa5FuBbqrNkq7chiBj5DVTQ/G0WvLm2EyPuKgyoo6DPYntTUbgTTeLoIr6HTNPt49Ru1k+zPHbShpA4IUAqATknPHtUeuaz4itMwXngbVIbRWGy8nhkWN2IztBMeM8nv/CawtP0n+xfix4TistsOoeIdbhVZcll81p4wGfOcDMmflB78V6t+018G/2hIbBZtH8c+G7TQ2vohb20kYMiHyW3ZJtD1YMfvHqKr4QPOrHUNS1u6RLzTrrw7pJys2rThhBAQCRuYhVBY7V5YcsPpXb6d4Z0Twzd6Vqen+M9P+ISu8dze6XbSJILYKVbyZNskmN+WX5lH3DweQKfxE1iys/gH4msZIpWv5LiF45VxsC+dBweevDdu4qv8ANP8H+FbGO/v9JuJv7RjtJ7ryJGJkwCz8GQAE7z0x17Uud3Fpex137QnwAPx0+EOjavZ+Kz8O9OutUWWLS4LXfDAY0niKriWIfMQX4Uck9etdHZ/APQvFkRvPFfxY07XdRQ+VHcaysdxMsQ5CBpLgkLuZjjpkn1Nd9e/Fr4X6h4atPD9x4d1mTSLWUzQ2+QNrndzuE+4/fbqe/0rDbxV8FV/wCZR1n/AL+t/wDJFS2M5XRPhP8ACfwLqsF/4X8Ta1qOoQ7miivh8jFlKNnFunRST16/lXz/AOPvh3q/gfxjqeoeJLNtO0XxVfzz2Vys0cjTQGQszhULFDtmQ4cA89ODX2B4i/Y/u9Y0me1svHc2kXMm3ZeW+nnfHhgTjEwPIBHXvXlqx+LvHTan8Ndf+F+tTQ6KW8O6d431KxmmWRTutzqMIeH5QRGk2FlPVfn4DVLqKLSe7IlOMZKL3Z8eeN/2efht42vpZrrXtcRXdXzAyLyqbe8Brtvih4mSH4O6/wCF9FZLyC9khnJmVhJuE0RIBO0AYjHUetexeK/2NPEPg3SIfsWoan4kmEwjPkaPJu2kM244kbgYA/GqK3Hhjwv8OdV8EaloWk6l4n1K4W8tb66jiW9jiDR5SONlLlf3MnIYD5m44Odna2hZ454b0vQ9Q8M+H11G8uILq1s4QUiHG/Yu4H5T3HY16toN9pmj+HLRI7ljbKWCu6kkksx7D69q5jwHqmleKv8AhJrW+0Wz8NHw/iKB7hU/4mGPMGVBVcY8odN33x+O6vjbR/h5pMOt/wBl2PjD7Qxtf7B8xB9nyS3nY2v/AHMfdH+s6+po0B10LFWC+vNW/ssklvK6rlVXLHI4GK8+1q31Px18RNLtPDl3d7ZbYp9m0stMHdRIxO1CMnbjt0FVJNP1WHULuwPim8S5s5WgurUyuHRwSpR135U5BGCOxqVG4Hq/w403w5e+Kbv/AISy/udM0n7KTFPaqWZpdyAKQEfjG/t2HPr9YQ/HrwLcMBHrm4/9ek4/9kr4k0NWh06GKW4NxMu7c7n5j8x681qW7Mky7WK/Q47VL0A948bL8IPH2oPJqHirU4ZlllZltoHUBnILD5oD3HFW/A2r/DH4Wt52h+I767lMTQFb6CRhsZg5PywrzkCvBNi7i20bmOSccmlbefuRtIfRRmgDwjxRa6TMpfTbqa4cIoAcYGd3PVR2qrbaRpGraBqdjqt1PbG4tjCnk9TuRg3O0juK+jof2H9A1LWII7T426bLKykLZw2cbM+ASSALrnjnp2r134Y/sX6R4KujcaprFl4rhd4ZFjvNHTbtQksBukfhwRn6d605kB8I+D/A/gaz0mDQk1jUTc2oaRo2HIBcnOfKwfvjp61N4/8AF1x4f+KWkeKdMSG4tLGx8tpJ1baGYzLgrkMeJB09a+2/i58C9M8Sa9dab4f8F2nh3yZI5hrGm6Oo89fKAMWUVeMtn7x5Tp6cL46/ZLls/h3qusQW76xqFu8aR6PHoxaWcGSMErhicAMW4U/dP4QtdwOU0W9h1rQdJ1ON91zqFtHczKoIRWdQ2FzzjLHqTWR4r8eaF4ITdrV99iXzFiz5MknzFSwHyKewNdL8ePAWueEfh38Np9G0nULFl0pn1JLGzkhNrshtzibaONvz/exjDe9eBnS4vGCAazqaSD/WYvsS/MPlH3m64P5UJXAm8L6DpOqaxbwx3M7KxYccHhSe619Q/D341694B/srSrW0099JX7Pa3E9xG7SJDHhdw2uPm2knoeR07V4h4Wj0ewuo4oZrF7lmZldNgcfLyBjnoDXXCaNmAEikk4ABHNOQH2Fp37QnguSzja81pYrg53IlncYHJx/Ae2K8u0XTPg1YeKbLxNa+LdWkurJWiRJIX8s7lZTkfZwekh6H0rwjUb+bS4RJFYyXzltpjTOQME56H0/WrsnxEl0fxXZfDWHwC99pusRG+l8exgiHTWUO3kMBERk/Z1HMq/68cf3oHd2se3fG3T/hR8bLPQm1HxPqcV1oMcxsEsYXjEruI+JN8DZGYk6Fep/D5z1fUvD3g7XLnSYr+U2tuF2PNGzOSyhjkhQOrHtXop/af+HNnp974Wn0TwvBe2ER0yfWJNRtlkjkUGNrhwY8qwKlyC2QQee9eM6p448IeIfFl9a219ompQoiyLqkV1DKs2FUEAjPQnH3j92rjfYRuR+OtEksZLxL3NvG21n8p+Dx2257itLSdYtNcjZ7KXzlUKzHaVwDnHUD0NZ/hXw3o3iC+isPMsbSxmZvMm8tGiVgu7kZAzwB17ivb/CvwD8IaXqWh6p/wsrRIre3lhubnTfLhRJ1Uqxhf9/jGAV5U9Tx2puKQjzKW6isbZJJ22ITtBwTzz6fSuk+CPxE8FyhfGllrMk19p1zJbRxtbyiE7ogDuUoGJxKehHavqWw0n4Y+IJDaWlp4T1OVF8wwwxWsrAZxu2gHA5Az7188+Iv+Cf89w2NB+Icnhmz2DfZabpBjjeTPMhCXCjcRtGcZwo5qBngXxW+MHi39pb4gt4a1PTdNii03VJ9O8PnTVaKS8E8ojXzTJIwBPlxc4QDc2fZ/wAJtR+LXwR+MOt+FNd8L6XYeG9P07FtdyzJNcPNIYJdreXORj55P4Rwo5z1+ovAv7Fdt4P1vw/q8niWLULvTLi3upJG0kJJcPEysSW80kFivXk5Pesb43eCdf1b4ra5e2Ph7Ury3lEG25t7KR0fEEYOGC4OCCPwodugEt5+1V40SMm30vRJH7BoZfX/AK610sHxW8NfEbR4k8bakNIuI4AoTTbeXG6Rf3o5WToVGPr3r5/kUwyBJB5bkZCtwcUFSACQQD04pAfT/g/4hfC/wJbRxWXiW6lRUaMG4tZmOGbcfuwjvXm3/CL/AAbXT5X07xZrE8ob5VkiYDPGf+Xcdq8nKhuoyKluLiz0WzkuLmaCztEILzSsI41yQBkngc4H4igDI1vxl4c0/X7qwg1FnjW5eCEvC+5gHKjPyjnp6VpW7C7t0niO+Js4bpnBx3+lcVcX3hm+vpp4rnSblxIX8xHiYgk5znP61r6fqUckax2t0ssYBIWGQEDnk4HvWnIIk8SeONB0m4XS72+8m8mjEqR+VI2VyechSP4T+VZI8WaVMpgiutwmGwfu3B54Hb3rM8U+HLLWLxLi6vLe2uVjCLJMilwuSeCSDjk/rUXgPwbpWpW/iTUtT8TWejR+H1FxBHd7ANQCiRiFLOuB+7XkBvvj8aUbAbHjD4cpovgiw8Zaz9os/DuoXX2W2vEkRt8oEg27FBcf6qTkgD5evIzwWsQ2lrqEcOlSvcho9370YO7JyOg7CvMfjVr3xA8e6lPpmhXviW98FwzR3FjBp81xPpyuItrmJUPlg72lyV7l88k1+ovibwf8FfHmkztoeteAtNOFiF9p62UnlsGDHlGGCQcYz0NLmGeT2/xt1zVvg3F4MvLXT4tLudBGkTTRxyeesLW/lMwO8rv2knO0jPbtXytdfsUN4k1y41PXYtUs/htOo/szWba9tvPnugFUoybWYDIuOTGv3BzyM/YGr/B2Cw1TQdM8P6vH4rs9UmFtcXmm2wkj01CyKsjhHYbSHYjJUYjPPp7Ze/Ahbv4X6P4PGsiM6fdNc/bhZ58zJlO3Zv4/1vXcfu+/GPMm2kK584fC/VbGy1i2u9enNiI5JATEpYbTGQDwG7nFew+PNW+FPxLsdAi1fxPf2x0mNli+x28i53BAd26Fs/6sdMd68g8Q/DPxTo2qRWtr4c1jUoXiEhuIdPl2qcsNpwp54HfvWPqnh7VtBWE6rpV7phmz5YvLd4t+MZxuAzjI/MUxnqUPhX4KKdi+L9aJA/55N/8AI1eTJpt3IMiLP/Ah/jVG51Sx0VRcX9zb2kTHYJLiRY1LdcZPfAP5VgQeMreRSYddilXOC0d2CM/gaqKuB9nXH7Tvhnyysdjq6v2Jhix/6Mry7xF8fvF9zqUj6Pq72lkZnKxzWkBYRlvlH3DyB715mrFhzXReGfh/4g8aR3TaJp/2024Uyfvo48bs7fvMM52np6VIG8vx68d4513n/r0g/wDjdfPPxusRY2U3j5AF1TSoI7aK4BJdVaXaQEPyHiZuozz7Cvq7wD+zrqOoXTL4rtbrTIPJZg1rcwsfM3AAcb+NuT+Fep6f8DdB03w3daLFd6i1rcS+c7tJHvB+XodmMfIO3rTQH5N+JvjHocdjo76faX8F9PGW1WV40IuJCEyVBc4GTJ0C9R+Gv4V1CPxdZx3ForRq6sw84AHhtp6E96/VG1+EWj2M1ncWtzfSXGnMskKySJtZlIKhvkHGVHQivG/jV+y1P488WX/i20hv5ta1B4knt47qBIFjSFY8qGGc/u06sep/CosD418S6j41+EHhu78TeD9YttI8S2JU2l75STCPzGWJ/kkjZTlHccqeuRzzV/wFqFz4mtxrmoSfaPEGopFe6xebQoubmQF5JFVQFUF2c4VVAzwB0HtVnZWfwf8AiZpFr8RpZPD1o0D3MkkX+kMI2WREYeUJOrrjGM/Qc1qfHfVfBVva+Gr/AMO6xc39rfJNO8lxE64QiJkIBjU8hj1q9OYR5PqK6qCW025jt2yP9YAeMcjlT3rOi8Xal4b1q3i1m5N3CVMjLbRpyCGA7L3FU9Z8bNDIw07ybhMjBkRhxjnuO9dh8C2HxM+KWieF9VP2fT77zzLJZ/LKNkEkg2ltw6oOoPGappAcR4y+I2sXz2x8N3jaeql/OFxDGdwONuMhunzenWm+HPGXjC+UQLq0QvVUs8jQx7SN3QfJ7jt2r3j4jfD+zs21mXwbJcatpvhc3DeIprt1VrKKPJ3AEIXO2KY/IH+4OORnybTPF2ja4gfT7z7REwLK3lOuQDg9VHepVhnfaTqVxoerQalZSeRewhhHLtDYyCp4II6E9u9d5pPx+8b2ms6ObnWzJo8FxGby3S0g3yQBl3Kp2DkqGHUdeveqcPwH+IO4LPoCo56BbyDp/wB/KwPE3g3WPB8kKavZ/ZGmLKg81HyVxu+6T0yKxA+kE/ah8KyDI0/WP+/MX/x2vK9V+PnjKa+jaw1loLUJhkktICS2Tz9w+3ftXmFqxMhU9MVawBQBs/EH4s+LPEHhPV4NQ1X7RC1jcIV+zxL8rRkMMhB2r5p0HRbjXpTFA8auELkyEgYBA7A+terfE7Wrbw/4R1FbiQR3N5Y3AtEZSwkYR8Djpyy9cda8l+Efla5GgkYjU/IkaWGPhVUSAAgn229+9awA3tJ8G6hp+qQXZmt/3e4fKzE8gj+7712is8cSlTidVyG7bvX86+gfhB+zjp3jHw1c3viOXUbC+ju2hSOzni2GMIhBOUbnLN37DivXvht8BPD/AMLbzULnSrzUrh72SOSQXksbAFCxGNsa/wB85zntVOSEfnF8TtS+IFnYJPoGu2ljI1wq5lhRvk2NkcxN3AqCbx945/4Rm7jg1uJL4yAxytbxbQMrnP7v0z271+pvjbwTY+PNKi0/UJbiGGOYThrZlVtwVlx8ynjDGvlDS/2afGiTKtxoskcWSWYXtuT04/j9azuhnwxb/Du1uLfxNqmtwx3l5fo9zdSRSuvmOwdpDgbQMljwMD6Vs/Dn4W6bLo9vc6Vax2yOjgCWaQnHmHI6nuK+6fhh+yLLN4m1HVfFsOo6W9heRXWl/Zbu3ZZyHdj5gAc4+WP+794/h5R+01fR+DfiRrNnYN515FJbq8dwCcK1ujZyMDPI796atfQDjfBuk32kSLDPNE8bOzkR8/w47geldmeRg9K4yDxvpsPP2lev/PJ/8K3tI8QW2rRyNDKHEYUnCMMZz6/SnJMR03hrxBqHg/UJL7SLj7JdSRGFpNivlCQSMMCOqj8q664+P3xCaNhF4gCv2Js7f/43Xmn9rWocoZfmHUbT/hV1lK8Gshnvng/9pb7Jpk0XiF76/v2hjWKW3toQqybSGY4K8E47dulc3r3x+8XT65cyaXq722mMF8qGW0gLr8ozn5T33Hr3ryiMZkX60anq1po8CzXkvkxs2wNtLc4JxwD6GgCvr88jXySM2XEYAOPc1k6pfajPahLO4WKUIwDOoxnHB6GuJ0X4tWGqXkdpc3MMZkJ4jhkzgDPv6VunxbpIfb9r6nA/dv8A4VukrCItP/4SyO4Y3WqW0kO3hVRc5yOfuD3rlvEQ8XHTJtO8S6rbajp8wV5YYEVdwDAryI1PDKD1r0K3nS6tkuIm3Qvna2MZwSP6VnWOoXnjTUItB8GRR6x4quwTZ2Ev7pZdoLvl3KKMIrnlh07ngvlQHlNnplnp8cht4fLVgMjcTkDOOp967Twvp+pLYxXdncRQwyKyqrcsPm5/hPcV9baT8HfAnhGT4c6r4z1jVdH8YxG2ujp8JWWD7cnktLFlIn+QSYX7/I6MetfUGn6hb6pZx3VrJ5sEmdrbSM4JB4PPUGocuwz81LzSLTUpA11D5vG37xHHXsa5Tx74HubzSxBpLwW1rNDKlzHM7EuhUAAHBxwW6Y619/fG74Hw+O7CfVLE3k+uwwR29vbJNEkTqJcsTuHXDufvDoPx+ffDv7I/ivwxqGpXVrpFzI+pSiW4E19bEKcsflww/vnrnoKJSurCPjhtS8T/AA60aDTbLUoYNOgcrFDHGsm0sWc8umTyT1PevojRPCWkeG7GSy0y0+zWsknmtH5jvliAM5Yk9FH5V9afCH9nuz8ITf29qD39vrlzbvb3FsZ4nhRfMBUrtU84RD949T+HjU3wN8eWNo9xeaEIY0Iywu4D1IA6SHuayGY/hfx9r3g1XXR777GHCBv3Mb52Z2/eU9Mmuib9oDx/21/B/wCvO3/+N1500qLqF5ZE4urOUwzx/wBxwSCM9DyD0p5+XrQB6z4L/aJ8TWOoQv4k1SbULMMxkjtrSAMV2EKBhV6Ng9a8y+PHxA8XePtSspLLVkisYZrlraO5gjVkjdk2g7UOSAozknp1NVoeZlFeQx/FaT/hMNX0zVvs1rb29+1tbNHE5d1EjKS2CRnAXsOpqoq7Asax4b8ReKLGOz1i+tbuCN/NCj5MMAQDlUB6MfzrNbwHJop8iLyUVvnwrseence1dlJ4t0ZWO28JH/XN/wD4mmx+LdJkXK3eR0/1b/4VtYVxkfxb2nnSsD3uf/sK7b4cfGDU21y1vNPa60220+4gmurS2vWVL5Q2dj7QBjCsOQ33zx68NZ+D9OW3Zbm3Eku7hlkfGOPce9bnhzS7LRWnFrCYjMVDfMWzjOOp96hx0Gffnw5+JGn/ABB0S2uo5La11GRXeXTFullmhVZCm5hgHB+U5IH3hXXV8G+FfGGr+CdQkvtFu/sV1JEYWk8pJMoSCRhgR1UflXYQ/Hrx7tO/XsnPazg/+N1kB9TXXjbw3ptw8Nzr2lWs6sUaOW8iRgwOCCC3UGsTxN8XNA0OwjuLTU9N1WRpQhhhv48gYJ3cZ44A6d6+QtY1S516+kvL+Xz7mSRpWfaFyzHLHAAHJqrQB65rPxx8M+Mphda98L9J1W5RBCsmpNFO4QEkKC8BOMsTj1Jr5Z8aeCdcuvEWt6pYT6hquk3d1Pc22k20LvBo0LOzLAmCVVApCjCoMRDjjA9KqSTxfB4K1bQoZFmOm6pOiajDCqs08QZQ6gsQVJV3Hykdeo4prcD52u4pNPkMd0jW0i8FJhsIyMjg+1fU/wAPf2fk+DfjLT/iP/wly6t/Y3mR/wBg/ZfI+0+bG0O7f5rYx5277hzsxxnI8C+NVxo+vfEzWZNCtZrPRW8loLe5J3piGMNk7m6tuPU9fwr0688eeLNSvY7eTVUbT3X95CYIwWIyQchM9QvftW2rQHDfEqHxtrmveONZ0a91/S9LuLm8vLzSLFp/J1KFnkcQSbSFdCpZeVYEOeOSC/4SwWlp4WsdSvdDhs7mZJI3024hCvF+9bBOVB5Cg9B96vQNc8WbtPtLXSjJau8Ri1IyIpExwB8uc4H3+m3qPw5ilGIiT4i/Ebx6upx+K9M8WeI5UsYFgPh201C4xdMWYF/lbqBID9w/6vr6ctN4g1XxWbXUG8d3mvX5xPdaKb57iXTpGwxt3HmEq2QUIKqcxnjjA6VWKnI6159efCPxD8FfFUHiKa90/wCyeM737fClo7ySBBIJMSB0AVsXA+6SM554FKUbbAdLH8SptDma1utKkaeMYYTTFW55GQVz0Iri/HXiDWPFWuQX+n6xfaBbR24haztrh9jsGY7ztKjOGA6fwjmuh1rWNDuNXnk1KzuLi9YL5kiHAPyjHAYdsdqytJm0b7M0d9aTTXDPlWRiAFwOPvD3oUUMzI4dX1q1WK7uL3XFhTaDMXmCZGD1JxnH6V1Pwt0dtN8QXBfTzbL9lZQzQ7AfnQ46Vv6LZ2llbl7KIwpOikgsSSMcdSfWtnSkP2pyvB2f1FX0EdXB408R2Vu0Nn4h1SzRjuIgvJEGeOcBhzxXqPhf9pq98OaSkF3pM+tTRwRoZptRbc7KuC3KNyx56/nXi1Fc4z7K8D/GjQfFXhuz1LUL7TtAu59++wudQj3xbXZRndtPIAboOD+NdloviDS/Edq1zpOpWmqWyuY2msp1mQMACVJUkZwQce4r4DrqfAfxE8QeAbF9N0LUPsNjLM1w8XkxyZkKhScupPRV4zjigD7Mv/F2haVJ5d7rWn2cmSu24uo0OR1GCeorwX4ifEzwQfHGpQ3Pw70DxWVEZ/tqUQS/aP3afxGFs7c7fvH7vbpXlPiDxBqPia4+0alcfaZQ7uG2KnLHJPygegrL2+tAHJ/GiHTfGnjzT5NF8OWvhPTvsCxSR2EC+SZA8rbzsVBuIKr68Dmsfw/oLeH/ALSv2o3Al2j7u3GM+59aZ8QdU1NfFtnpWm3K27T2okHmIpXIZyckgnotadmsy2kK3DiS4VFEjr0ZsckcetbxStcRDqlg2oWyxxTG0lD7jKg+YjB44I9R+VY8nh7Vypb/AISO9J9Mv/8AF10lFVZCMC68X6r/AGTcaXHo9556wNbJqCs+5m2lRKPlznPzdfx715jquneIZmKX2v6mFBB8q4eTAOOuC3XH869t7g9xXO+MZNJ021W/1O1luFllWP8Ack53bTg43AYwtTyroO553HY2kMgljtoY5F6OqAEfjW1p+g3N/JETFLHGxXEvlEjB/izU9xcaE+qwvHZTLp4TEkRY7i3PI+b/AHe/auu8K61Z6/4g0Xw9p8UsM2oXUNhbtMBsRndUXcQScAkZOCfrVAWdP0O/0/TYYrSO41VlJ/dQxsSASTuwM/T8a7jSfjd4X/Zi8VWWkX3gTSNT1WVG1CLxHcSxWU9qjq0XlKTCzAHy25DjPmEY9Y/E3w78ffCDxVe3F1remPpJVLaGC0Bd1dkV8nfEOPlbv3FeFXX7P3xc/aC+LeiW+seJ9AvY5bRrZluC0GQizSgfurcdzn/61ZSd9gO+8XftoWfxE+I2h6VNokFh9t1U21nePq4l8vzZkUSRgxjOMqeCM8civVLrxh4j0WZrK08Vao9vH91ob2RU5GTgBsdSa8I0n9kH/hXfihrnxvHpur3umXiyaLJpt3cYtZIXJYsCsYbkRY3Bvunp39SLF+Scmsxnd+CfjH4g8L61bXuoalqWu28RctaXOoSbJMoVAOdw4J3dO1ejTftdNFGzL4SMhAJCjUevt/qq+faKAPrv4f8Ax40fxhp8NxqZsfDsro7NBd6gmVIfaAdwXqPm6V1cnj7wfef6O/iPRJ9//LJr6Fs456bvb9K+GaI/3Nws6cSrwG/z9aAPsnxRpvw98NaTd6zNofhl57iCS8hEkFujXzBd/wArFcuWJHIz94etfDvxVjm8X+PdU1bSbqTw1p9x5XlaXZk+VDtiRTt2lR8xUscKOWP1rufEXjPWfEWj2NvqV59pg02Bo7RfKRfLXaoxlVGeEXrnpXBvMZ2Lsck1pBXEzJ8W3WoW/g3UNMsZLmTUJmSSPUbdm82L50yq456KehHDGvK9I8F6hb3FxcXdvdahczMJPNmt2Lh8kk5OTkk9favaaFO05HBrTlQrnk1xay2bEXELwHOCJFK/zqDzki43qnfGcV6hqWh2WrSF7qHzWJBPzsOQMdj6VwHi7S7O31KNbWHyo/KBKliedzepPtQMteFtfOizpaybFsXZpJJGUlgduBjHuB2716Fb3SNHFcxtujwJFbB5HUcVzeleCY7G/iuJ51u40zmGSEYbII7k+uenak8VW93BGjWkk0UChyyQ5CqoxgHHQAUwO20vXbXWb2SyhlD30aeZJEFYALkDOSMdx371r27Fozn1rwW21K7s7hri3upoZ2Xa0schViOOCQc44H5V6NJ8TIY/Dd3qa2iG6hkCJp4uBvlBKjcPlzxuJ6H7prFx7DO5oryL/heN3dRywf8ACNTQGUbBN9pP7vPG7/V9uv4V0PwH8eT+GPiXqmt6vYyeLtOuNOaCPRL2YtDA++E+cNyuN2EYcKD+8PPrNmB0d/4y0jTfF1l4Yubvy9cvYTcQWvlOd8YDkneBtH+rfgnPHuKz/j5+yD8QfjEvgLWNE8PS30WnCS7jaPULWIESeQyFhI4JB8voMEd6+7bH4e/DTxRfRa7Z+GvCmrXttm3j1KCwtppIuCSgkCkrw54z/GfWu2t7eKzt4oIIkhgiUJHHGoVUUDAAA6ADtSA+SvgT+xHp2neCdMvvGD6zpPiuRJo7yxt7y2eGMec2zaVR+qKh++eSenQe+/C/4N6L8JtOkstIur+5ie4a5LXsiM25kVCPlReMKK7yvAP29D43H7J/jn/hXA8QHxn/AKD9g/4Rfz/7R/4/7fzfK8j95/qvM3bf4d2eM07sDn/i58O/h3p83iHUZNd1RfEl413cW9njML3WSwjyIeF3sByw4PXvXzV4gkvPDukQ3l3EkXmS+Vydwzhj2PotfaXwX8Bvr/wP+FVz4w01pfE0Ph3TpNV/ty08y9e7NtCZ/PMo3+aXDb9/zbs55rvbz4ZeD9ShWG78J6HdRK24RzadC6g884K9eT+dUpNAfmz4T0vxZ8VtYt9C03S4LqG7LKTFIsT7kUyEAu+OijtX2P8AFbwBfw/s+6Tay27rPoPheWK5XzU/dslogYE5w2DGfu+lYk37F99Z6xBqGgfEK48NyQptA03TmiO47gWBSdcEqcfQV5T8bf2gvFfhrSb3wT4h8P6xotnYQXWjJ4i1O8lgj11EXyXugrxgMpAEhG9xiUfMc5JKV2B83CioLO8t7+2Se2njuYGztlhcMpwcHBHuKnrRAOhYwXlvcrzJA4kUHoSCDz+Vdfo3jZri+kGp+Tb2+zKvEjElsjjqe2a5WOwu50LQ2s0wAz8kZP06Cug03wPJf6XDcy3LWkzk7rd4fmXBI55HXGenemI7LR9aivoS1m6zJvKklSOcD1xSDx7o8MtxDc3gjlgYo6iJzgjII6e1R6dp6abCY4gqgtu+RdvYD+lYuq+DItSuDKkyQM7MzlYQS2TnnkZ7/nQ43A7rTr2HVtOivrV/NtZSQkmCM4JB4PPUGr+nruvIwff+Rrxq7ste0qZrCwuNRktYcFPswkVORk4AOByTXQab8UrsTKLHQZtWusnZb28xd345wAhPAyenasXEZ6xJDlhjJ5rkNc+J3hjw5rFxpmo6n9nvYNvmReRK2MqGHKqR0I716Dq/7OepXnw1k8YXfxDuvDk95pDasmkzW7I0LND5pgDGZeUJC52j6DpXy9D8KvFl5rVxqkmj6z4m0udAsWuNYTTQ3LDaCFlwysV2svDHGwjtihK4GP4s8U6p488RW2uxW9u720H2YeUCicFzyGbOfn/lXV6f451RLG1hlt7VTDGqcK3YAf3vasG3git4ysKJGhOSI1AGfwqzb2dxeLIba3kuPLHzCJC2PTOOnQ1qtAO4sfGNk9lG13OsVyc70WN8Dk47HtjvWpHrFpMpZJdwzj7p/wAK5DT/AATLqWmxTy3D2UrE5heE7lwSPUfXp3r1r4fSaX4d8RWerXujWes2duXD6bcRp5c26MqCcqw4LBvun7opttCO2+Gn7OGueOrKSbxHYXFhoWoRwvaXVldQB5LeUEs+CWIOwqQCoPPTtX1b8NPh9p3wr8E6b4X0ma6uNPsPM8qS8dWlO+V5DuKqo6ucYA4xXkWg/tR2v9paLo8fhSHTNOkmitPOW+Cw2cW5V3bfKACqpzjIGF7V7GPiL4UbgeJ9GJ/7CEX/AMVWDbYyp8VPhrpnxe8Bap4S1me7ttN1DyvNlsXVJl8uVJV2llYfeQZyDxn6188+LP2EfAvh3wXq19Yar4jmu9O0+aa3Sa6tyrukZKhsQjglRnBH4V9ZVynj7wXeeM7aC3ttbn0iFUkjmjiVmWdXAGGAdcgAHg5+8aQH5R3HwhsNU8baha2ct5N4hSJXubTzECJHtTDAlQD1T+I9fy+pvgH4dj0uzNzohkvPGcd3J9h06dgIJUMShyxwoBCGU/fHKjr0PW6l+wfcXXjrUvElp8Q5dOe8iWIww6UQVAVB98XAyDszjHf2r6b03wX4f0W4S40/QtMsJ0JKy21nHGy5GDgqoIyOKiHPrz2+Xbp8+5EebXmPnP8AaA+HHiW88P6Dr0Om7zY2txfa7+/jCWmEjd9gLZYDbJwu4/KOvGfm9vH2mRjC3Ske8T/4V9z/ABr+EOr/ABYsrO007xne+FbVI7iK7htY3dLxJQg2yKsqAgBWGDnO89O/zp4i/YoufAtmmoW2sy+K3kkEBsotHOUBBbzOJH6bcdP4uvrtFos8osfiHoV1AzNfAMGwAsMnt/s1dsfF2k6lcCG2u/MkLBAvluOScAcirv8AwqHWrKBppvA1/awqfmZ9JdVGeOSUrjdS8FyR35aC4bS5opWLRxxFGDA9DgjBBBqnG+wjvmUoxVuDSbd3FeYeINJ1y1skmt9b1C6laQKY4mk3AYPPDH0H51t2fj+/sRum8PXEoBzl3Ydsd0qeVjM/4yePJtC0mPTLMQSrqEFxbXHmoxZBtVflIIGfmPr0FeV+AfF15pFwbW1igkMcLf6xWzgsD6j1q14n0258Saze3U+oSqslxLLHbSZcW4ZidgyeMcDoOlSaTo8WlwIoVHmAIabYAzAnOD+n5VUVYDsJPHmpdVgtif8Adb/4qt6z8YWL2sbXM6xzFFLqsb4DY5HQ964Cl2s/yojSOeFRRksewFaXFY7i88YQSfJp8iTzA5KujD5cdecd8VwqsGHHNeh+GNNt/wCwLX7RpkcN5829pYgJCNxwDkZ6Y/Sr39iWHayth/2xX/CmIvUyaFLiGSKQbo5FKsM4yCMGn1m3niLT7GTZPcbGyVxsY8jr0FAHF+KtDOhzNdR7FsHdY44lYlwduTnPuD371j12WreNBH/x4NFONw/1iMOMc+neqfgX7FDOgaZxfeYxjjwcFdnXOPr37VIzn7O2a+uYoIyA8jqgLdMk4rv/AAP4fn8P61PNdNHIjQFAIiScllPcD0NbGQzMe+eaGG6iwXPYPhL8Wbz4d6lBbzzzN4caWSe5s7eGNndzHtBBbBHKp/EOn5/Uvgvx5p/jq1knsIbmFI0jci5VVOHBI6MfQ18MVJp9y+l65perQBXutNuEuoUf7jOjBgGHXGVHQiucZ+g9FedfCH4tWvxD0q3gu5oE8SLFJNdWdtDIscaCXapDNkHgp/Eev5dn4i8Raf4T0e41XVbj7LYW+3zJtjPt3MFHCgk8sBwO9AGlRXj/AIu/aF0m3tYX8N3lvqEjI7MLi2mUA4GzqF68/l2ryD4gfFLVvidosOl6rb2dvbw3C3KtZo6vuCsoBLMwxhz29KAPpzxh47sPBNnLc30NzKkaq5FuqscM20dWHevlb9qr+wv2jPDtjYWFlNHqNva3sFpNqDGJI5Z0jVGPls2QGQE5B6cA1hKeasQ65J4ZddZiEbS6aReIJQShMfzjcAQSPl7HNAHyH4N+FHirwL4vvfCeq6jp1xb6fbb1jtWYoGco4IYxqx4kPXua7aaz07wx/oetW7Xd6/71JLZjtCHgA8rzlW7dxWl8WfFunfEjxtqXiya5C69qBiFxbW8bpAipEsYKhgTnCJ/EeSfw5zVtMtv7YgsdIeS6aSPcBJgHd82RkgDoK3WwHY+FtV0/UFnSwglhEIQN5nfrjHJ9DW/XL6X4Htrf7HcyvOt1HskZN6lQ4wSOnTPvXUVaJCiiimIVJGhbcpwelejfD39ns+HPiRpEDLZeaySyBo7iYjBikHcexqh8Ifh7pHxK8S3Ol6zc3draw2jXKvZsquXDooB3Kwxhz27CqvjL9nLxToX7VXg7xV4Z0qbUfB9jpLxXmo3V3AGiuGW7XZs3K5H7yLkKR83Xg4zkykey/GxdR8H+EWfxRcR6j4et7G5ItLIDzPsyRjzEzhDuKYA+br3HWvjHVv27LDSYR4N8IrrWleH9OYTW1nPZWz+XuBZ/nZ3c5eVjyT19gK9l+Ivhzxx4m1CSDQNFt7+/glmRInmjQGRiAiktIo5I9fyr5v8Ai98Oda8TbvCnj6ybRNbsZ0uri10+aNtjeWdg3gyKQY5VPBPJ7YIrPluM0fEngvXPhbp82s+Jru1v9LtQrTQ6eSZGDsI1xuROjMCeRwO/SvXrbwGfC/g7wz4iiEKWfimwi1CBEd2kVGjSQCQEYDYmH3SRnPPSvlKz+Nk3i3Q7i4svsc1wsnlKqwyIpxtJ+8fQnvX0lofxUf4g+EfDFhd/Zkm0Wxhg228TqdxjRSGLEg8x9venGTuBpVpab/qW/wB7+grNqPwTeazfaXLJrdpDZ3QmKqkBypTauD95uck9+1aT2JR0NKp2tkcGkornKPSbH9oLxvHextda20lsM70Wzt8ng4/gHfHevWdB/ac8P3EemWV1aatJqEwjhklEEQRpTgFuJOmT6fhXy7RQB9/aPqkWtadDewK6RSZwJAA3BI5wT6Vdr877m2W6jCOSADn5a9cg/aa8WaZZulrp2jyMW3ASQy+w/wCeo7CgD60oryzRP2ifB8nh+wuNY1iO11JrWOS8his7gpHLsBkVcIcgNkdT06mr+qfG7w2vhu11nTtSS4tLiXy0kktpgDjcDxtBHKnrQBu/ETXtP0DwrfXOpwTXFnGIy8cAG45kUDHzDuR3r4P8TXcOoeKNZurZWjtJ7yaWFH+8qM7FQeTzgjua7Hxh4wuvFl8l/qCQQvHEIQLdWC7QxI6k85Y1wNywe4lYcguSPzrWAmR0yWNZoyjDKnqKfRWxJj/8IjpLTPI9plnbcx8x+TnJ71i6x4Jlmu5G08wwW5I2rI7Ejjnse+a7KilYZ5vq/hu50WFpZ5IWVQCfLJJ5OO4FP8LaXJqF4k8ZQLbyRu24nOM5449q72/v4NNtXuLh/LhTG5sE4ycDge5rzS+vFn1g3kRDqs5mjOCAw3ZGR+VSM9Tzu5orgv8AhOdQWMKsNscf7Lf/ABVb2leMLCW3Y39wsE2/AWONyNuBz0PfNVcVjj7jXL6aQMl3cRADGFmaqUksk7FpZGlbOcuSTTaKkoKfHLJBIJIpGikHR0JBH40yigDufDPiiO+jFvcbYJYwkavJKN0zcjIyBzx79a6WvIkZopo5UO2WNgyN6Ecg13nhXxCdTjW2nLveIjO8m0BSN3GMexHamhHWWN4UYRyEsCc+YzdOOlaKsGGQcj2rC27jtrPsfHdmuqHTZI7hpfOFurBF2ht23rnOM1nKPVAd3pWtahoVw1xpt/dafOy7GltZmiYqSDtJUg4yBx7Cty/8X65rNnJaX2tahfWkmN8FxdySRtggjKkkHBAP4Vy1XrVtyn61kMsRgLgAYAqaP7x+lQjvUsP9KAJl606az/tGzuLTGfPjMWNu77wI6d+vSsnxR4htvCPh671m8SWS1ttodIAC53OqjAJA6sO9eQ+K/wBo5ZooG8NNfadcIrlnuLeFgW42Hkt0Ofz70wOD+LVjL4X8f6pYxSPH5XlDagMeMxI33e3Wu40bxZo9xrFvEILFJWDYuRKm5PlPHT+vevNtYbW/Hl5Jr+qXkV1fXePMlZRGW2gIPlVQo4UDj0rTs9DtbOZZUiCyrnDbmPUY9a2QHtisrorqQyMMqwPBHqKdXm9r4o1KCSyQ3P8AokJVWjEa5KDHAOPQetd1purQ6pCskSuqsCRvAB4OPWrJL1NZ1jVmYhVUZLE4AHrVXVdUi0iwlu5ldo48ZCAE8kDufeu61v8AZ58aXXwvbxta6ro8eiNozazJbvJJ9oNuYPO2Y8orv25GN2M9+9JtIDgpvH03hNftekX7rdOfKb7FdmN9h5OSvOMqOPpTfBH7TXiW38SWetammq3Vnbb45NIu9Ul8ufKMAx3KRwWB+6eUH4eXwXAuoxKM4b1/Kn0txn0of2zP7NukmPwq+y+c+9Lo3uzzOc7w32fnqDnPevGPiV4+/wCFjePtU8TtYfYHvhEDbmbzSmyJI/v7RnOzPTvXP32s3mpW9pBczeZFapshXao2rgDHA54UdfSqVSlYZyPjbTdK0yB7fSPD1npkDIrmeyt0jVW3852qBkgAde4qv4m/aEs/ipcfDrwB4c8LwfDq40to9C1XxPpd2u+9ZzDALydUjiOUMckmHkb/AFjfMOSe8t/B1348P9iWEkEN5dfce5ZljG35zkqCeintXm/h34S2ek+LtfAghGoaZff6VKs0hV5EkfLID2yrHkDqOKiS1A9u0tj8O4V8NTa8fGTWQJOvPLg3W8+Z3Z/u79n3z9zt0HV2+sW0jiGC+iZ25CRygk/gDXlC9KltbqWxuUuIG2TJna2AcZGOh9jWvQVj2aPUtqopjzwAW3fr0q0t1EVBaRF9iwrzLQ/Gj2y3R1VpLndjyfKRRt65z09vXpXV6dfRatp8V1GrCOTOA/B4JHY+1LlTA6iiuVvvG1vo9wsN2s8jsu8GNFxjkeo9Kl03x5YandJBHFcqzuqAuqgZJwP4qycWhnS0UE44oqQEZQylWAKkYIPSr8F1N/ZsVmJpPscZLJb7j5anJ5C9AeT+Zqj1q3CNsYFAHNfE7Up9F8CanfwpITD5XzRsV6yoPvDp1rzPw38X4tQjWG7s0szEEQzTXY+fsW5UemfxrI+Jvxhm8Xxvp+kS3FtoM0KC4tbmGMM8quW3BhuIHCdGHTp68ToWivq0rNlPJhKmRWJBKnOQMewNax0EfQdn4i028tUmW/tdrZ6TqRwcdc1pKwbkEEe1eOW9nDZwLDCmyJei5J6nPetr/hKNVjjIjutp6/6tP8K0uFj0qiuUtfHVvFaxfaEnkmVB5jKi4LY5I59a14/EFvNpMOoKkohlYqqkDdwSOefY0xGR401sRwvpZtQ/nosnn7vu4bOMY/2fXvXGAbQAOlWb7ULjUphLcSeY4XaDtA469vrVapKCiiigAooqW1tZ77zPs0Mlx5f3vKUtt+uOnQ/lQBFR0roNL8IT6hGGmeSzJBOHhPrjHJFdJonhe20vT5Le4SG/kaTeJpYRkDAG3nPoe/egRxdr4f1C9j3wW+9cBgd6jr06mun0XwfEljFLdGaK8YEPGrKVHJxjg9gO9dHDbx267Y41jXGMKoAqSnYLnl2sWk1ldpFcp5chTcFyDxk+n0NUsV6Vr2hxa1ZyR/JFcMFC3HlhnUBs4HQ+vfvXnV5Yz6bcPDOkiqrlEkkUqJcHGRn8PzpAdf4J8aRaai6fqDxwWMMbFJFRmcsWzg4zxy3bsK9TtVaNCCMDNfPFdCvjTUF8OXWnfaLn7VNIHS++0NvjAKnaO/8ACe/8RqHEZ6r4k8aaJ4KksP7evfsKXhbySInk3Bdu77inGNy9fWvKPE/7SGpaP4uvovD0Wm32gKqC2urmCUSOSilsjevRtw+6OAPrXNakl1rfk/2nfTal5OfK+1sZfLzjO3cTjOB09BUa6TZCFUa0t3I/iMS/4UKIHJ6R4LuLW6jiuYpI7ViWdxIhYccdPcDtXUaVo0GjNceQ8jiYjPmEHpnpgD1q+WLdTmkqgCipILeW6cpDE8zgZKxqWOPXivZ/hh+zP4m8UeMNPsdW0vVtDsZTIJLy90iXyo8RMwzu2jkgDk9SKLgeP2Gk3eqJM1pF5vkjL/MBjOcdT7Gvavhp+yhq/wARvDNlq91Y3iWdysjJJbXluoLLIyYw2SOh6+lfUvwg/Zd0P4ZvrR1J9P8AFK6gYvLW60qNRb7PMyBuZ87t49Pujr29i0zS7LRbGOy06zt7Czizst7WJY40ySThVAAyST9Sahy7AcR8IfgnofwX0KfSdEutQurea6a8ZtQkjd97IiEAoijGEHbrmvQKWiswOB+LXwX0T4y6LDpmtXWoWtvFdLdq1hIiNvVGQAl0YYw57elfAvj79nTx38O/D93r2saE1no1oE8+5a8t5Nhdwi/KjljlmUcDvX6F/Eyz1S90GBNJubu1uRcqzPZ7t5Xa+QdpBxnH5CvmPWNL+IHiSwl0zVtK8S6np0+PNtb2G4mifaQw3IwIOCARnuAapOwHyCrBlDDkEZFb/g34d+KPiZqkul+EtMXVtShhNzJC08cOIgyqWzI6j7zqMZzz0r2vxV+z7rHiJdOSy8M32ifZwyyNb6Q/73O3BOAvTB9etec69oHjT4C6xcX0aa9ods+2zXVlimsEmLqJPLEnQ/cPy5P3M9qvm0A9n/Z++BHjPw98UNF1GbRinhqEz/ar1ruBnjcwSKoCh9xGTGOFP3j+E37QH7P0+kvrniC2iu3jlN9fSNJcQ7QBl8gYBxyeOtdz8K/iSkuhz2eta4vhrVJLpmisb++8ud4wiHzFVirFSQwyBj5T6GvV/idNpt58FtdFze2srz+H7oQXErqxcm2PzqSec5B4PpWd9QPy/oqxf2q2d3JCkonVcYkXocgH1pbPTbm++aGCWSPOC6IWAOOnFbgQwxtPNHEg3SSMFVfUk4Fd54d8NJp0CXExkS9ZWSSPcCoG7jGB6Ad+9R+GvC6aaGnuCtxJJsdFkiwYSMnjJPPI9OldHTsK5z914Ns7qQSPLOCBj5WX/CuavfCuoWxu5VtybSHcwkMi52DJyRn0HpXotI6iSN42AaNxtZWGQQexFOwjzDwzrs3hvUpdRs1jllliMJEwJXGVPYg5+UV3Oh/ESGeEnVJIraTeQBFG5G3Awe/fNR6x4WtdSs0gtkhsHV9xkihGSMHjjHr+lczqvhO409sQ+ZdjaDmOE+uMcE1DiUelW/ijTJrK7vY7ndbWcfnTv5bfIgBJOMZPCnp6V4J8WfiXJ43vptJtvs8vh+GWO4trqON0mdxHghtx6Zd/4R0H47shvbGGSCQz20NwpSSJtyrIuMFSO4wenvWZ/ZNiOllbj/tkv+FTygcBDbyX0gghXfK3Rc46c9/pXfaVafY7GBGyJPLUOpIOCBUken2sMgeO2hjcdGWMAj8anqwCiiigAJ2gk9K7nwbpNsmnw6ikkhnmVkZSRtAD9uP9kd65PR7WS61K1C2zXEImTzcIWULuH3uOmM9a9Ohhit4xHDGkMa9EjUAD8BTQmZeueGrbX76O6uHmSRIxGBGQBjJPcH1rntc8Gy27Ww0xJLkMT5vmuo29MY6e/wCVdxSU7CPMNQ0W80uISXUPlIW2g7lPOM9j7VRr1m4tYLtQs8Mcy5ztkUMM+vNUp9AspWBW2gjGMYEK0rDuYel+Cijg3wimTJyEdhxjjsO9dBpejWei+f8AY4fJ87G/5mbOM46k+pq9RTsIKKKKYgooooAKzNc0ODWLf95GGnjVvJZmICsR149wK06KQzynULGTTbyS2lKtImMlORyAf61Xr1PUtIt9at1gunkjjVt4MZAOcEdwfU15tqGmXWkzLDdxeVIy7wu4HjJGeD7GpGVaKcimR1RRlmOAK3tG8Jz3kha6jeO3KkqyOuS2QMd/egZzzNtUk9K6Ox8GXc32C6eSA2kuyR03NuKHBI6dcH1rrtG0qLRLN7aBneNn8wmQgnOAOwHpV6nYVz1v9m/4W+A/FWsSWl9ock94ljJK8huZkU4mQDG2QdiO1fZiqF6V+dtjJm2RPTJ/Wvo3wj+0+819F/wlDafpthuYSy21tOzKNvykAM3VsDp/jWEtxn0NRVXS9Ttta0uz1Cyk86zu4UuIZNpXejqGU4IBGQRwRmrVSAUUUUAFeSePP2gNI8D/ABh0PwDdW+ovqWqWBvopYIYmgCjzuGZnDA/uG6KRyPfHrdYN54Nsr3xhY+I3luBfWcJgjjVl8oqQ4yRjOf3h79hVxaV7oadjW0+6W/sLa5UMFmiWQbhzggHn86534jfC/wAM/FnRINI8V6Z/aunQ3C3ccPnyw4lCsobMbKfuuwxnHP0rqqKgR4J8cP2aLHxVoN3qPg+2s9K8bRRRQ2GpX11OYoVEoMgZfnU5jaUDKHlh04I+MfEPxZ+Kemzap4N8R+J7e/t9JaXSDHbWkATahMThW8lWIIUYJ5+hr9Sa84+MXwJ0D42x6YmuXepWg08TLF/Z8saZ83Zu3b0b/nmMYx1PWmtwPzO0vTJ9cuniidBIE3kyEgYBA7D3r0q00+002MxWcRhiY7ipYnnpnkn0Fesa7+yt4r8NeJLvS/C2k3GpeF4FX7Je3l7bCaQsqs+4bk6OzgfKOAOvU+GN400030apcqbQr88nlvkHnjp9O1bpoR0NFZkXijSbqeKGC73yyMEVfLcZJOAORWoylGIIwaoQlFFFMQUUUUAUL7Q7LU3ja6g83YSR87DrjPQ+1c7feBZ7jUpjZNBDaYGyOR2yOBnse+e9djRSsM8fjkEi5H606vVtQsk1KyktZSyxyYyU4PBB/pXM6h4JjjjzaGaV8Hh3Xr27ClYdzj6DV7UNFvdLhWW6h8qNm2htynnBPY+xrR8G2N62s297HCGs4y6vIWHDbDxjOe47d6Qzo/CumCxsxMAo+0RxucEnsT/Wtyhm3MT60VRIUUUUxBRRRQAUUUUAFFFFABRRRQAUUVnavrkGi+UZ0kfzM48sA9MepHrQM0lUtwAT9K57xLc6ZqGj3Ei3NpNdDaqMHVnxuHAOc9z+tYF54z1E6lNJZXDQ2jABI3jQsOBnPB7571z6oFGAMCpGd14Z0G1hg852hu2kVHAaMExnBPqfX9K6NEWNQqKFHooxXEeFNc+zXC20pdvOaOOPaowOSOfzFd0y7WwaEDEoooqiS3pshMzLngL6+4rRdFkUq6hlPUMMis3TF/0lj/s/1Fadc8tyj1j4KfGK78H6gul6nLNqGn3kttbRSXd8Ui06JSVJUMCAoDDgFRhB+H1Rp2r2OsW6T2F7b3sLglZLeVZFODg4IPrxX5/11egfFbxX4U0+Ky0jVfslvEGCJ9nifALFjyyE9TmoGfcFFcn8NfiVpnxS0OfVdKgu7e3huWtWW8RVfcFViQFZhjDjv611lABRRRQAUUUUAFFFFABXg3xU/ZJ8OePt39jLpfhDMCRf6Do0f3lkLF/lZOSDt+g/Cvea8w+MnxTPg+2l0jT2uLfXp4I7i3ulijeJF8zDBtxPO1HH3T1H4AH5s+LvDep+B/G2uaPcW91DHpeoT2ltqEkLQi7EUrJ5qA9AdqtwTjcOe9dz4WkmufD9rPNK80jbsvISxPzsOp+laHxt8SWuu3EMt/HNPqTNcM02AqmVipZsAgYLc9PwrifAOsXk2oSac826yhhZ0i2jhty85xn+I9+9dEWJndUUUVZIUUUUAFFFFABRRRQBXvIbaWMC6SKSPOQswBGfx79a5dfH0FtCyW+jxwKTkrFIFGfXhaXxh4iSNmsYfMS4ikVmYqNpBXPHPuO1chipuUen2uuWVxb27m6gSSVFYxeauVJA+X61dVlkUMpDL6g5ryVGMbq6nDKcg+9akPijU4Ywi3OFHQeWv+FFwsekUVg6f4ys9S1GKziinWSQEguq7eAT6+1btMQtFFFMQUUUUAFFFFABVfUb+DSrVLi6fyoWbYGwTzgnGB9DXL6h46O7U7BbMo6l4FnE3KnldwG38etcvJfXdxAsNxdzXMancFlkLDPrgn3qbjOk17xd5zeTZGOa3ZAWZkYENnpzj0FcvJIZJGc8Fjk4ptFIoKKKKACus8L+K0s4Usb1o4bOJWKSKjFixbODjPqe3auToPNAHr1Fcb4T8RTSXUdhP5lw0rMwmkkJKgLnGD9PXvXZVRI+GdrdiygE4xzWvFMky7kO4Zx0rFqSC4a3cMCSo6rnANRKNwRs0U2N/MjVsY3AGnVgUbPg/wAVXfgnxHaa1Yxwy3Vrv2JcKSh3IyHIBB6Me9fWPg340eHPE1npkL6kg1meOFJ7eO2mCrO4AKAlcY3ZGcn618a1LBeXdkS9ndzWU/VJoHKMjDowII5B5FAH6DUV4V+zh8VL3xFHH4T1FLi+vLG0lupNXurtpZJ8zDClWGRgSgZ3HhB+HutABRRRQAUUV5z8Z/iJL4H0mG0gtXkm1SC4iS5jnMTW7BVAcYU5IL56jpQBD8UvjBF4OBsdJkt7nXIp0We1uYZCqRshbdkbQTynQnr09PmdSFGFOaL7UrrUrp7q8uZru5kxvmnkLu2BgZJ5PAA/CofM4oA8U8UabZ61rGrCxlknvIZ5g8f3QJCxwOQOMg96d4P8PXOlyfaLqJop2jZGXepA+YY6ewrznU/EF3p3xC8RwRzTAXGqSoCspXZ++cdPx/SvQtA8USR26WU0bXEsaljcPKSzZbPce+Ovat46IR19FFFaEhRRRQAUUUUAFQX00ltp91cRKGeGJpAG6ZAJGfyqeuN8aajPuhiilkgTMiuqOQJBwMEfn+dIZzd/qEuqXkl1OqpLJjKoDjgAep9Kr0UVJQUUUUAFaOia5caB9p+zpG/n43eaCcYzjGCPWs6igDutM8ZW0lshvZUhm53KkbEDnj17VvWt5Fexl4X3qDtJwRz+NeT1Yj1K8hXbFdzxrnOFkIH86dxWP//Z" preserveAspectRatio="none" id="img2"></image><clipPath id="clip3"><rect x="0" y="0" width="2322250" height="1310640"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-1730 -364)"><rect x="3126" y="567" width="1143" height="2135" stroke="#BFBFBF" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#2E75B6" fill-opacity="0.101961"/><rect x="4921" y="569" width="1039" height="2135" stroke="#BFBFBF" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#70AD47" fill-opacity="0.101961"/><rect x="5577" y="1274" width="69" height="74.9999" fill="#F2F2F2"/><rect x="5524" y="1360" width="178" height="75.0002" fill="#F2F2F2"/><rect x="5524" y="1445" width="178" height="74.9999" fill="#F2F2F2"/><rect x="4985" y="930" width="911" height="277" stroke="#70AD47" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#70AD47" fill-opacity="0.2"/><text fill="#0AA619" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="73" transform="matrix(1 0 0 1 5117.12 1032)">step environments</text><rect x="4985" y="1810" width="911" height="788" stroke="#70AD47" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#385723" fill-opacity="0.2"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5262.73 1148)">env.step<tspan font-size="64" x="254.192" y="0">(…)</tspan></text><rect x="4993.5" y="689.5" width="59" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5112.5" y="689.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5052.5" y="689.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5172.5" y="689.5" width="59" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5291.5" y="689.5" width="59" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5231.5" y="689.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5350.5" y="689.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5470.5" y="689.5" width="59" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5410.5" y="689.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5529.5" y="689.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" stroke-dasharray="13.75 4.58333" fill="#F2F2F2"/><rect x="5649.5" y="689.5" width="59" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" stroke-dasharray="13.75 4.58333" fill="#F2F2F2"/><rect x="5589.5" y="689.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" stroke-dasharray="13.75 4.58333" fill="#F2F2F2"/><text fill="#7F7F7F" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 5005.56 744)">a<tspan font-size="23" x="26.9268" y="0">0</tspan><tspan font-size="48" x="116.477" y="0">a</tspan><tspan font-size="23" x="143.404" y="0">n</tspan><tspan font-size="48" x="175.798" y="0">a</tspan><tspan font-size="23" x="202.726" y="0">0</tspan><tspan font-size="48" x="292.275" y="0">a</tspan><tspan font-size="23" x="319.202" y="0">n</tspan><tspan font-size="48" x="355.249" y="0">a</tspan><tspan font-size="23" x="382.176" y="0">0</tspan><tspan font-size="48" x="471.726" y="0">a</tspan><tspan font-size="23" x="498.652" y="0">n</tspan><tspan font-size="48" x="58.8794" y="-2">...</tspan><tspan font-size="48" x="235.265" y="-2">...</tspan><tspan font-size="48" x="414.273" y="-2">...</tspan></text><path d="M5163 780C5163 788.56 5161.84 795.5 5160.42 795.5L5084.58 795.5C5083.16 795.5 5082 802.44 5082 811 5082 802.44 5080.84 795.5 5079.42 795.5L5003.58 795.5C5002.16 795.5 5001 788.56 5001 780" stroke="#BFBFBF" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M5342 780C5342 788.561 5340.84 795.5 5339.42 795.5L5263.58 795.5C5262.16 795.5 5261 802.44 5261 811 5261 802.44 5259.84 795.5 5258.42 795.5L5182.58 795.5C5181.16 795.5 5180 788.561 5180 780" stroke="#BFBFBF" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M5521 782C5521 790.284 5519.88 797 5518.5 797L5442.5 797C5441.12 797 5440 803.716 5440 812 5440 803.716 5438.88 797 5437.5 797L5361.5 797C5360.12 797 5359 790.284 5359 782" stroke="#BFBFBF" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M5879 782C5879 790.284 5877.88 797 5876.5 797L5800 797C5798.62 797 5797.5 803.716 5797.5 812 5797.5 803.716 5796.38 797 5795 797L5718.5 797C5717.12 797 5716 790.284 5716 782" stroke="#BFBFBF" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M5700 782C5700 790.284 5698.88 797 5697.5 797L5621 797C5619.62 797 5618.5 803.716 5618.5 812 5618.5 803.716 5617.38 797 5616 797L5539.5 797C5538.12 797 5537 790.284 5537 782" stroke="#BFBFBF" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="5708.5" y="689.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5827.5" y="689.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5768.5" y="689.5" width="59" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><text fill="#7F7F7F" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 5719.4 744)">a<tspan font-size="23" x="26.9268" y="0">0</tspan><tspan font-size="48" x="116.477" y="0">a</tspan><tspan font-size="23" x="143.404" y="0">n</tspan><tspan font-size="48" x="57.8687" y="-2">...</tspan></text><path d="M5157 1538C5157 1546.28 5155.88 1553 5154.5 1553L5078.5 1553C5077.12 1553 5076 1559.72 5076 1568 5076 1559.72 5074.88 1553 5073.5 1553L4997.5 1553C4996.12 1553 4995 1546.28 4995 1538" stroke="#BFBFBF" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M5336 1538C5336 1546.28 5334.88 1553 5333.5 1553L5257 1553C5255.62 1553 5254.5 1559.72 5254.5 1568 5254.5 1559.72 5253.38 1553 5252 1553L5175.5 1553C5174.12 1553 5173 1546.28 5173 1538" stroke="#BFBFBF" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M5515 1540C5515 1548.28 5513.88 1555 5512.5 1555L5436 1555C5434.62 1555 5433.5 1561.72 5433.5 1570 5433.5 1561.72 5432.38 1555 5431 1555L5354.5 1555C5353.12 1555 5352 1548.28 5352 1540" stroke="#BFBFBF" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M5873 1540C5873 1548.28 5871.88 1555 5870.5 1555L5794 1555C5792.62 1555 5791.5 1561.72 5791.5 1570 5791.5 1561.72 5790.38 1555 5789 1555L5712.5 1555C5711.12 1555 5710 1548.28 5710 1540" stroke="#BFBFBF" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M5694 1540C5694 1548.28 5692.88 1555 5691.5 1555L5615 1555C5613.62 1555 5612.5 1561.72 5612.5 1570 5612.5 1561.72 5611.38 1555 5610 1555L5533.5 1555C5532.12 1555 5531 1548.28 5531 1540" stroke="#BFBFBF" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#7F7F7F" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 5005.13 1623)">scope<tspan font-size="23" x="128.906" y="0">0</tspan><tspan font-size="48" x="180.111" y="0">scope</tspan><tspan font-size="23" x="309.017" y="0">1</tspan><tspan font-size="48" x="358.324" y="0">scope</tspan><tspan font-size="23" x="487.23" y="0">2</tspan><tspan font-size="48" x="537.27" y="0">scope</tspan><tspan font-size="23" x="666.176" y="0">3</tspan><tspan font-size="48" x="711.977" y="0">scope</tspan><tspan font-size="23" x="840.883" y="0">m</tspan></text><rect x="4986.5" y="1359.5" width="179" height="76" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5165.5" y="1359.5" width="179" height="76" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5344.5" y="1359.5" width="180" height="76" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><text fill="#7F7F7F" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 5246.38 1412)">r</text><rect x="5701.5" y="1359.5" width="180" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="4986.5" y="1274.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5106.5" y="1274.5" width="59" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5046.5" y="1274.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5165.5" y="1274.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5285.5" y="1274.5" width="59" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5225.5" y="1274.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5344.5" y="1274.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5463.5" y="1274.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5404.5" y="1274.5" width="59" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5523.5" y="1274.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" stroke-dasharray="13.75 4.58333" fill="#F2F2F2"/><rect x="5642.5" y="1274.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" stroke-dasharray="13.75 4.58333" fill="#F2F2F2"/><text fill="#7F7F7F" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 5000.7 1329)">s<tspan font-size="23" x="24.0625" y="0">0</tspan><tspan font-size="48" x="116.477" y="0">s</tspan><tspan font-size="23" x="140.539" y="0">n</tspan><tspan font-size="48" x="175.798" y="0">s</tspan><tspan font-size="23" x="199.861" y="0">0</tspan><tspan font-size="48" x="292.275" y="0">s</tspan><tspan font-size="23" x="316.338" y="0">n</tspan><tspan font-size="48" x="355.249" y="0">s</tspan><tspan font-size="23" x="379.311" y="0">0</tspan><tspan font-size="48" x="471.726" y="0">s</tspan><tspan font-size="23" x="495.788" y="0">n</tspan></text><rect x="5702.5" y="1274.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5821.5" y="1274.5" width="60" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5762.5" y="1274.5" width="59" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><text fill="#7F7F7F" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 5714.53 1329)">s<tspan font-size="23" x="24.0625" y="0">0</tspan><tspan font-size="48" x="116.477" y="0">s</tspan><tspan font-size="23" x="140.54" y="0">n</tspan><tspan font-size="48" x="-648.634" y="83">r</tspan><tspan font-size="48" x="-288.026" y="83">r</tspan><tspan font-size="48" x="68.9775" y="84">r</tspan></text><path d="M0 0 178.139 0.0999672" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" stroke-dasharray="13.75 4.58333" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 5523.5 1359.6)"/><path d="M0 0 178.139 0.0999672" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" stroke-dasharray="13.75 4.58333" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 5524.5 1434.6)"/><rect x="4986.5" y="1445.5" width="179" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5165.5" y="1445.5" width="179" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><rect x="5344.5" y="1445.5" width="180" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><text fill="#7F7F7F" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 5240.94 1498)">d</text><rect x="5701.5" y="1445.5" width="180" height="74.9999" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" fill="#F2F2F2"/><text fill="#7F7F7F" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 5060.46 1498)">d<tspan font-size="48" x="360.608" y="0">d</tspan><tspan font-size="48" x="717.611" y="0">d</tspan></text><path d="M0 0 178.139 0.0999672" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" stroke-dasharray="13.75 4.58333" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 5523.5 1445.6)"/><path d="M0 0 178.139 0.0999672" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" stroke-dasharray="13.75 4.58333" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 5524.5 1521.6)"/><path d="M5588.5 1274.5 5635.84 1274.5" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" stroke-dasharray="13.75 4.58333" fill="none" fill-rule="evenodd"/><path d="M5589.5 1349.5 5636.84 1349.5" stroke="#7F7F7F" stroke-width="4.58333" stroke-miterlimit="8" stroke-dasharray="13.75 4.58333" fill="none" fill-rule="evenodd"/><text fill="#0AA619" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="73" transform="matrix(1 0 0 1 5320.51 1916)">render<tspan fill="#595959" font-size="64" x="-100.455" y="116">env.render</tspan><tspan fill="#595959" font-size="64" x="221.914" y="116">(…)</tspan></text><rect x="3194" y="2322" width="1003" height="276" stroke="#4472C4" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#2E75B6" fill-opacity="0.2"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3269.79 2540)">agent.record_transitions<tspan font-size="64" x="749.375" y="0">(…)</tspan></text><rect x="3194" y="685" width="1003" height="276" stroke="#2E75B6" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#2E75B6" fill-opacity="0.2"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3504.4 902)">agent.act<tspan font-size="64" x="280.156" y="0">(…)</tspan></text><path d="M4658.5 472.208 5442.52 472.208 5442.52 615.293 5437.94 615.293 5437.94 474.5 5440.23 476.792 4658.5 476.792ZM5453.98 610.71 5440.23 638.21 5426.48 610.71Z" fill="#595959"/><path d="M5375.5 1660.5 5375.5 1786.74" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M5434.5 1660.5 5434.5 1786.74" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M5494.5 1660.5 5494.5 1786.74" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4237.01 2953.61 3629.21 2953.61 3629.21 2671.42 3633.79 2671.42 3633.79 2951.32 3631.5 2949.02 4237.01 2949.02ZM3617.75 2676 3631.5 2648.5 3645.25 2676Z" fill="#595959"/><path d="M4440.47 2873.28 3689.21 2873.28 3689.21 2671.42 3693.79 2671.42 3693.79 2870.98 3691.5 2868.69 4440.47 2868.69ZM3677.75 2676 3691.5 2648.5 3705.25 2676Z" fill="#595959"/><path d="M5523.52 2652.5 5523.52 2790.52 4865.5 2790.52" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#4472C4" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="73" transform="matrix(1 0 0 1 3385.81 2424)">record transitions<tspan font-size="73" x="20.625" y="-1637">compute actions</tspan></text><path d="M2802 564 3192.84 682.813" stroke="#4472C4" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 0 391.593 52.0454" stroke="#4472C4" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 2802 1014.05)"/><text fill="#7F7F7F" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="48" transform="matrix(1 0 0 1 5579.6 1498)">. . .<tspan font-size="48" x="-0.358398" y="-83">. . .</tspan><tspan font-size="48" x="12.7778" y="-168">...</tspan><tspan font-size="48" x="192.79" y="-167">...</tspan><tspan font-size="48" x="-164.873" y="-168">...</tspan><tspan font-size="48" x="-345.906" y="-167">...</tspan><tspan font-size="48" x="-522.742" y="-167">...</tspan><tspan font-size="48" x="74.9956" y="-168">...</tspan><tspan font-size="48" x="-45.415" y="-169">...</tspan><tspan font-size="48" x="-39.7012" y="-756">...</tspan><tspan font-size="48" x="21.8574" y="-756">...</tspan><tspan font-size="48" x="81.4385" y="-756">...</tspan><tspan fill="#595959" font-size="64" x="-3734.27" y="-351">Execute each agent method </tspan><tspan fill="#595959" font-size="64" x="-3734.27" y="-274">sequentially (one agent after the </tspan><tspan fill="#595959" font-size="64" x="-3734.27" y="-197">other) in the same process</tspan><tspan font-size="48" x="-570.81" y="-634">scope</tspan><tspan font-size="23" x="-441.903" y="-634">0</tspan><tspan font-size="48" x="-390.699" y="-634">scope</tspan><tspan font-size="23" x="-261.793" y="-634">1</tspan><tspan font-size="48" x="-212.486" y="-634">scope</tspan><tspan font-size="23" x="-83.5796" y="-634">2</tspan><tspan font-size="48" x="-33.54" y="-634">scope</tspan><tspan font-size="23" x="95.3662" y="-634">3</tspan><tspan font-size="48" x="141.167" y="-634">scope</tspan><tspan font-size="23" x="270.073" y="-634">m</tspan></text><path d="M4565.76 464.241C4563.24 464.241 4561.02 465.505 4559.07 468.031 4557.12 470.557 4555.56 473.954 4554.37 478.223 4553.18 482.492 4552.59 486.315 4552.59 489.693 4552.59 491.665 4552.88 493.137 4553.46 494.11 4554.04 495.083 4554.94 495.569 4556.15 495.569 4557.94 495.569 4559.75 494.698 4561.6 492.955 4563.45 491.212 4564.95 489.166 4566.08 486.815 4567.22 484.465 4568.23 481.249 4569.12 477.17L4569.44 475.67C4569.88 473.671 4570.09 471.671 4570.09 469.672 4570.09 467.889 4569.76 466.538 4569.08 465.619 4568.4 464.701 4567.3 464.241 4565.76 464.241ZM4565.27 460.027C4569.3 460.027 4572.73 460.999 4575.56 462.945L4579.5 460.027 4584.03 460.675 4577.47 488.234C4577.06 489.936 4576.86 491.476 4576.86 492.854 4576.86 493.745 4577.01 494.434 4577.31 494.921 4577.6 495.407 4578.12 495.65 4578.85 495.65 4579.68 495.65 4580.59 495.272 4581.56 494.515 4582.54 493.759 4583.67 492.638 4584.97 491.152L4587.88 494.07C4585.05 496.906 4582.6 498.859 4580.55 499.926 4578.5 500.993 4576.33 501.527 4574.06 501.527 4571.8 501.527 4570 500.898 4568.67 499.642 4567.35 498.386 4566.69 496.731 4566.69 494.677 4566.69 494.002 4566.77 493.354 4566.93 492.732L4566.36 492.651C4564.39 494.975 4562.7 496.724 4561.28 497.899 4559.86 499.075 4558.35 499.973 4556.76 500.594 4555.17 501.216 4553.4 501.527 4551.45 501.527 4548.21 501.527 4545.75 500.426 4544.07 498.224 4542.4 496.022 4541.56 492.813 4541.56 488.598 4541.56 484.951 4542.09 481.33 4543.14 477.737 4544.2 474.144 4545.71 471.043 4547.68 468.436 4549.65 465.829 4552.13 463.775 4555.12 462.276 4558.1 460.776 4561.49 460.027 4565.27 460.027Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="41.5" x="4541.56" y="496.339">𝒂</text><path d="M4600.33 480.729 4607.48 480.729 4605.75 488.175 4612.99 488.175 4612.18 491.898 4604.89 491.898 4602 503.812C4601.5 505.858 4601.16 507.411 4600.97 508.474 4600.78 509.536 4600.69 510.434 4600.69 511.169 4600.69 512.102 4600.86 512.783 4601.19 513.209 4601.53 513.636 4602.03 513.85 4602.68 513.85 4603.58 513.85 4604.49 513.532 4605.44 512.897 4606.38 512.261 4607.4 511.278 4608.49 509.948L4610.78 512.301C4608.78 514.505 4606.94 516.039 4605.26 516.903 4603.58 517.767 4601.6 518.198 4599.32 518.198 4597.05 518.198 4595.32 517.618 4594.1 516.456 4592.89 515.294 4592.29 513.691 4592.29 511.646 4592.29 510.712 4592.36 509.824 4592.5 508.98 4592.64 508.136 4592.92 506.751 4593.36 504.825L4596.43 491.898 4591.78 491.898 4592.35 489.486C4593.74 489.486 4594.79 489.357 4595.5 489.098 4596.22 488.84 4596.82 488.453 4597.31 487.937 4597.79 487.421 4598.26 486.636 4598.72 485.584 4599.18 484.531 4599.71 482.913 4600.33 480.729Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="37.4697" x="4591.78" y="513.515">𝒕</text><path d="M4699.64 2780.3C4697.13 2780.3 4694.9 2781.56 4692.96 2784.09 4691.01 2786.61 4689.44 2790.01 4688.25 2794.28 4687.07 2798.55 4686.47 2802.37 4686.47 2805.75 4686.47 2807.72 4686.76 2809.19 4687.34 2810.17 4687.92 2811.14 4688.82 2811.63 4690.04 2811.63 4691.82 2811.63 4693.64 2810.75 4695.49 2809.01 4697.34 2807.27 4698.83 2805.22 4699.97 2802.87 4701.1 2800.52 4702.12 2797.31 4703.01 2793.23L4703.33 2791.73C4703.76 2789.73 4703.98 2787.74 4703.98 2785.77 4703.98 2783.96 4703.64 2782.59 4702.97 2781.68 4702.29 2780.76 4701.18 2780.3 4699.64 2780.3ZM4717.03 2758.9 4721.61 2758.9 4711.36 2804.29C4710.95 2806.07 4710.75 2807.61 4710.75 2808.91 4710.75 2809.8 4710.9 2810.49 4711.19 2810.98 4711.49 2811.46 4712 2811.71 4712.73 2811.71 4713.57 2811.71 4714.48 2811.33 4715.45 2810.57 4716.42 2809.82 4717.56 2808.69 4718.85 2807.21L4721.77 2810.13C4718.93 2812.96 4716.49 2814.92 4714.44 2815.98 4712.38 2817.05 4710.22 2817.58 4707.95 2817.58 4705.68 2817.58 4703.88 2816.95 4702.56 2815.7 4701.24 2814.44 4700.58 2812.79 4700.58 2810.73 4700.58 2810.06 4700.66 2809.41 4700.82 2808.79L4700.29 2808.67C4697.72 2811.77 4695.33 2814.04 4693.1 2815.46 4690.87 2816.87 4688.2 2817.58 4685.09 2817.58 4682.04 2817.58 4679.67 2816.48 4677.98 2814.26 4676.29 2812.04 4675.45 2808.84 4675.45 2804.65 4675.45 2801.01 4675.98 2797.39 4677.03 2793.79 4678.08 2790.2 4679.6 2787.1 4681.57 2784.49 4683.54 2781.89 4686.02 2779.83 4689 2778.33 4691.99 2776.83 4695.36 2776.08 4699.12 2776.08 4701.52 2776.08 4703.84 2776.5 4706.09 2777.34L4707.46 2771.14C4707.63 2770.46 4707.82 2769.49 4708.03 2768.22 4708.25 2766.95 4708.36 2765.99 4708.36 2765.34 4708.36 2764.75 4708.3 2764.28 4708.17 2763.95 4708.05 2763.61 4707.87 2763.33 4707.63 2763.11 4707.38 2762.9 4707.06 2762.73 4706.65 2762.61 4706.25 2762.49 4705.45 2762.34 4704.26 2762.18L4704.91 2759.55Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="58.6836" x="4675.45" y="2810.25">𝒅</text><path d="M4736.51 2796.79 4743.66 2796.79 4741.93 2804.23 4749.17 2804.23 4748.36 2807.95 4741.06 2807.95 4738.18 2819.87C4737.68 2821.91 4737.34 2823.47 4737.15 2824.53 4736.96 2825.59 4736.86 2826.49 4736.86 2827.23 4736.86 2828.16 4737.03 2828.84 4737.37 2829.27 4737.71 2829.69 4738.21 2829.91 4738.86 2829.91 4739.75 2829.91 4740.67 2829.59 4741.62 2828.95 4742.56 2828.32 4743.58 2827.33 4744.67 2826L4746.96 2828.36C4744.96 2830.56 4743.11 2832.1 4741.44 2832.96 4739.76 2833.82 4737.78 2834.25 4735.49 2834.25 4733.23 2834.25 4731.49 2833.67 4730.28 2832.51 4729.07 2831.35 4728.47 2829.75 4728.47 2827.7 4728.47 2826.77 4728.53 2825.88 4728.67 2825.04 4728.81 2824.19 4729.1 2822.81 4729.54 2820.88L4732.61 2807.95 4727.96 2807.95 4728.52 2805.54C4729.92 2805.54 4730.97 2805.41 4731.68 2805.15 4732.4 2804.9 4733 2804.51 4733.48 2803.99 4733.97 2803.48 4734.44 2802.69 4734.9 2801.64 4735.36 2800.59 4735.89 2798.97 4736.51 2796.79Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="37.4697" x="4727.96" y="2829.57">𝒕</text><path d="M4769.46 2798.39 4774.29 2798.39 4774.29 2814.06 4789.03 2814.06 4789.03 2818.62 4774.29 2818.62 4774.29 2834.28 4769.46 2834.28 4769.46 2818.62 4754.72 2818.62 4754.72 2814.06 4769.46 2814.06Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="35.8911" x="4754.72" y="2829.8">+</text><path d="M4813.66 2792.65 4817.47 2792.65C4817.35 2794.55 4817.29 2797.19 4817.29 2800.57L4817.29 2825.47C4817.29 2826.62 4817.35 2827.49 4817.48 2828.07 4817.61 2828.66 4817.84 2829.14 4818.17 2829.5 4818.5 2829.87 4818.97 2830.16 4819.58 2830.37 4820.2 2830.58 4820.97 2830.74 4821.91 2830.84 4822.84 2830.95 4824.07 2831.03 4825.6 2831.07L4825.6 2833.78 4799.9 2833.78 4799.9 2831.07C4802.1 2830.97 4803.67 2830.83 4804.62 2830.67 4805.56 2830.5 4806.28 2830.24 4806.78 2829.91 4807.27 2829.57 4807.64 2829.09 4807.86 2828.48 4808.09 2827.86 4808.21 2826.86 4808.21 2825.47L4808.21 2803.93C4808.21 2803.12 4808.05 2802.53 4807.74 2802.18 4807.44 2801.82 4807.01 2801.64 4806.48 2801.64 4805.98 2801.64 4805.28 2801.87 4804.38 2802.33 4803.48 2802.78 4801.99 2803.72 4799.93 2805.12L4798.11 2801.97Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="41.1333" x="4798.11" y="2828.64">𝟏</text><path d="M4487.6 2856.87C4489.84 2856.87 4491.63 2857.5 4492.97 2858.75 4494.31 2860.01 4494.98 2861.66 4494.98 2863.72 4494.98 2864.45 4494.83 2865.37 4494.53 2866.47L4494.85 2866.6C4497.12 2863.41 4499.5 2860.99 4501.99 2859.34 4504.47 2857.69 4507.23 2856.87 4510.25 2856.87 4512.04 2856.87 4513.93 2857.13 4515.93 2857.64L4513.54 2868.54 4506.28 2868.54C4506.28 2866.73 4506.09 2865.47 4505.69 2864.77 4505.3 2864.07 4504.63 2863.72 4503.69 2863.72 4502.96 2863.72 4502.16 2863.97 4501.3 2864.49 4500.43 2865 4499.4 2866 4498.2 2867.49 4496.99 2868.97 4496.09 2870.39 4495.48 2871.74 4494.87 2873.09 4494.34 2874.82 4493.88 2876.93L4489.3 2897.72 4477.67 2897.72 4484.19 2869.55C4484.6 2867.8 4484.8 2866.46 4484.8 2865.54 4484.8 2864.65 4484.65 2863.96 4484.36 2863.48 4484.06 2862.99 4483.55 2862.75 4482.82 2862.75 4481.98 2862.75 4481.07 2863.12 4480.1 2863.88 4479.13 2864.64 4477.99 2865.76 4476.7 2867.24L4473.78 2864.33C4476.48 2861.57 4478.88 2859.64 4480.97 2858.53 4483.07 2857.42 4485.28 2856.87 4487.6 2856.87Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="40.8516" x="4473.78" y="2892.61">𝒓</text><path d="M4528.99 2877.57 4536.14 2877.57 4534.42 2885.02 4541.65 2885.02 4540.85 2888.74 4533.55 2888.74 4530.66 2900.65C4530.17 2902.7 4529.82 2904.25 4529.63 2905.32 4529.45 2906.38 4529.35 2907.28 4529.35 2908.01 4529.35 2908.95 4529.52 2909.62 4529.86 2910.05 4530.2 2910.48 4530.69 2910.69 4531.35 2910.69 4532.24 2910.69 4533.16 2910.37 4534.1 2909.74 4535.05 2909.1 4536.06 2908.12 4537.16 2906.79L4539.45 2909.14C4537.44 2911.35 4535.6 2912.88 4533.92 2913.75 4532.25 2914.61 4530.27 2915.04 4527.98 2915.04 4525.72 2915.04 4523.98 2914.46 4522.77 2913.3 4521.56 2912.14 4520.95 2910.53 4520.95 2908.49 4520.95 2907.55 4521.02 2906.67 4521.16 2905.82 4521.3 2904.98 4521.59 2903.59 4522.02 2901.67L4525.09 2888.74 4520.45 2888.74 4521.01 2886.33C4522.4 2886.33 4523.45 2886.2 4524.17 2885.94 4524.88 2885.68 4525.48 2885.3 4525.97 2884.78 4526.46 2884.26 4526.93 2883.48 4527.39 2882.43 4527.84 2881.37 4528.38 2879.76 4528.99 2877.57Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="37.4697" x="4520.45" y="2910.36">𝒕</text><path d="M4561.95 2879.18 4566.77 2879.18 4566.77 2894.85 4581.52 2894.85 4581.52 2899.4 4566.77 2899.4 4566.77 2915.07 4561.95 2915.07 4561.95 2899.4 4547.2 2899.4 4547.2 2894.85 4561.95 2894.85Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="35.8911" x="4547.2" y="2910.58">+</text><path d="M4606.14 2873.43 4609.96 2873.43C4609.84 2875.34 4609.78 2877.98 4609.78 2881.35L4609.78 2906.25C4609.78 2907.41 4609.84 2908.27 4609.97 2908.86 4610.1 2909.45 4610.33 2909.92 4610.66 2910.29 4610.98 2910.66 4611.46 2910.95 4612.07 2911.15 4612.69 2911.36 4613.46 2911.52 4614.39 2911.63 4615.33 2911.74 4616.56 2911.81 4618.09 2911.85L4618.09 2914.56 4592.38 2914.56 4592.38 2911.85C4594.59 2911.75 4596.16 2911.62 4597.1 2911.45 4598.05 2911.28 4598.77 2911.03 4599.26 2910.69 4599.76 2910.35 4600.12 2909.88 4600.35 2909.26 4600.58 2908.65 4600.69 2907.64 4600.69 2906.25L4600.69 2884.72C4600.69 2883.91 4600.54 2883.32 4600.23 2882.96 4599.92 2882.6 4599.5 2882.43 4598.97 2882.43 4598.47 2882.43 4597.77 2882.65 4596.87 2883.11 4595.96 2883.57 4594.48 2884.5 4592.41 2885.91L4590.6 2882.75Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="41.1333" x="4590.6" y="2909.42">𝟏</text><path d="M4295.51 2937.2C4297.59 2937.2 4299.82 2937.37 4302.21 2937.71 4304.6 2938.05 4306.79 2938.5 4308.76 2939.07L4306.65 2948.75 4301.5 2948.75C4301.26 2946.24 4300.62 2944.39 4299.58 2943.2 4298.54 2942.01 4297.05 2941.42 4295.1 2941.42 4293.29 2941.42 4291.84 2941.88 4290.74 2942.81 4289.65 2943.75 4289.1 2945.01 4289.1 2946.6 4289.1 2948.01 4289.56 2949.24 4290.48 2950.29 4291.4 2951.35 4293.18 2952.67 4295.83 2954.26 4298.69 2955.97 4300.75 2957.72 4301.99 2959.53 4303.23 2961.34 4303.85 2963.46 4303.85 2965.9 4303.85 2968.65 4303.12 2971 4301.65 2972.95 4300.17 2974.89 4298.16 2976.34 4295.61 2977.28 4293.05 2978.23 4290.14 2978.7 4286.87 2978.7 4281.63 2978.7 4276.61 2978.05 4271.8 2976.76L4274.07 2966.71 4279.21 2966.71C4279.35 2969.14 4279.98 2971.04 4281.1 2972.42 4282.22 2973.8 4283.94 2974.49 4286.27 2974.49 4288.4 2974.49 4290.05 2973.97 4291.21 2972.93 4292.37 2971.89 4292.95 2970.42 4292.95 2968.53 4292.95 2967.39 4292.76 2966.44 4292.38 2965.65 4292.01 2964.87 4291.39 2964.1 4290.52 2963.36 4289.66 2962.62 4288.25 2961.65 4286.31 2960.46 4283.6 2958.84 4281.64 2957.16 4280.41 2955.42 4279.18 2953.68 4278.56 2951.72 4278.56 2949.56 4278.56 2945.59 4280.05 2942.54 4283.02 2940.4 4286 2938.27 4290.16 2937.2 4295.51 2937.2Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="41.5" x="4271.8" y="2973.51">𝒔</text><path d="M4323.05 2957.9 4330.2 2957.9 4328.47 2965.35 4335.71 2965.35 4334.91 2969.07 4327.61 2969.07 4324.72 2980.99C4324.23 2983.03 4323.88 2984.59 4323.69 2985.65 4323.51 2986.71 4323.41 2987.61 4323.41 2988.34 4323.41 2989.28 4323.58 2989.96 4323.92 2990.38 4324.25 2990.81 4324.75 2991.02 4325.41 2991.02 4326.3 2991.02 4327.22 2990.71 4328.16 2990.07 4329.1 2989.44 4330.12 2988.45 4331.21 2987.12L4333.51 2989.48C4331.5 2991.68 4329.66 2993.21 4327.98 2994.08 4326.31 2994.94 4324.32 2995.37 4322.04 2995.37 4319.78 2995.37 4318.04 2994.79 4316.83 2993.63 4315.62 2992.47 4315.01 2990.87 4315.01 2988.82 4315.01 2987.89 4315.08 2987 4315.22 2986.16 4315.36 2985.31 4315.65 2983.93 4316.08 2982L4319.15 2969.07 4314.51 2969.07 4315.07 2966.66C4316.46 2966.66 4317.51 2966.53 4318.23 2966.27 4318.94 2966.02 4319.54 2965.63 4320.03 2965.11 4320.52 2964.6 4320.99 2963.81 4321.45 2962.76 4321.9 2961.71 4322.44 2960.09 4323.05 2957.9Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="37.4697" x="4314.51" y="2990.69">𝒕</text><path d="M4356.01 2959.51 4360.83 2959.51 4360.83 2975.18 4375.57 2975.18 4375.57 2979.74 4360.83 2979.74 4360.83 2995.4 4356.01 2995.4 4356.01 2979.74 4341.26 2979.74 4341.26 2975.18 4356.01 2975.18Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="35.8911" x="4341.26" y="2990.92">+</text><path d="M4400.2 2953.76 4404.02 2953.76C4403.9 2955.67 4403.84 2958.31 4403.84 2961.69L4403.84 2986.59C4403.84 2987.74 4403.9 2988.61 4404.03 2989.19 4404.16 2989.78 4404.39 2990.26 4404.72 2990.62 4405.04 2990.99 4405.51 2991.28 4406.13 2991.49 4406.75 2991.7 4407.52 2991.85 4408.45 2991.96 4409.39 2992.07 4410.62 2992.15 4412.15 2992.19L4412.15 2994.9 4386.44 2994.9 4386.44 2992.19C4388.65 2992.09 4390.22 2991.95 4391.16 2991.78 4392.11 2991.62 4392.83 2991.36 4393.32 2991.02 4393.82 2990.69 4394.18 2990.21 4394.41 2989.6 4394.64 2988.98 4394.75 2987.98 4394.75 2986.59L4394.75 2965.05C4394.75 2964.24 4394.6 2963.65 4394.29 2963.29 4393.98 2962.94 4393.56 2962.76 4393.02 2962.76 4392.53 2962.76 4391.83 2962.99 4390.92 2963.44 4390.02 2963.9 4388.54 2964.83 4386.47 2966.24L4384.66 2963.09Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="41.1333" x="4384.66" y="2989.76">𝟏</text><path d="M4638.2 2792.49 3748.21 2792.49 3748.21 2671.42 3752.79 2671.42 3752.79 2790.2 3750.5 2787.91 4638.2 2787.91ZM3736.75 2676 3750.5 2648.5 3764.25 2676Z" fill="#595959"/><path d="M5463.48 2652.5 5463.48 2871.31 4644.5 2871.31" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M5403.93 2652.5 5403.93 2951.64 4441.5 2951.64" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3698.5 630.746 3698.5 474.5 4497.03 474.5" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="1813" y="567" width="987" height="447" stroke="#2E75B6" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#2E75B6" fill-opacity="0.2"/><path d="M1870.5 791.5C1870.5 755.049 1900.05 725.5 1936.5 725.5 1972.95 725.5 2002.5 755.049 2002.5 791.5 2002.5 827.951 1972.95 857.5 1936.5 857.5 1900.05 857.5 1870.5 827.951 1870.5 791.5Z" stroke="#595959" stroke-width="6.875" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" fill-opacity="0.4"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1902.47 814)">A<tspan font-size="37" x="42.9688" y="0">1</tspan></text><path d="M2080.5 791.5C2080.5 755.049 2110.05 725.5 2146.5 725.5 2182.95 725.5 2212.5 755.049 2212.5 791.5 2212.5 827.951 2182.95 857.5 2146.5 857.5 2110.05 857.5 2080.5 827.951 2080.5 791.5Z" stroke="#595959" stroke-width="6.875" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" fill-opacity="0.4"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2111.65 814)">A<tspan font-size="37" x="42.9688" y="0">2</tspan></text><path d="M2289.5 791.5C2289.5 755.049 2319.05 725.5 2355.5 725.5 2391.95 725.5 2421.5 755.049 2421.5 791.5 2421.5 827.951 2391.95 857.5 2355.5 857.5 2319.05 857.5 2289.5 827.951 2289.5 791.5Z" stroke="#595959" stroke-width="6.875" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" fill-opacity="0.4"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2320.78 814)">A<tspan font-size="37" x="42.9688" y="0">3</tspan></text><path d="M2612.5 789.5C2612.5 753.049 2642.05 723.5 2678.5 723.5 2714.95 723.5 2744.5 753.049 2744.5 789.5 2744.5 825.951 2714.95 855.5 2678.5 855.5 2642.05 855.5 2612.5 825.951 2612.5 789.5Z" stroke="#595959" stroke-width="6.875" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" fill-opacity="0.4"/><path d="M2002.5 788.063 2054.65 788.063 2054.65 794.938 2002.5 794.938ZM2050.07 777.75 2077.57 791.501 2050.07 805.25Z" fill="#595959"/><path d="M2209.5 788.063 2265.82 788.063 2265.82 794.938 2209.5 794.938ZM2261.24 777.75 2288.74 791.501 2261.24 805.25Z" fill="#595959"/><path d="M2530.5 786.063 2588.91 786.063 2588.91 792.938 2530.5 792.938ZM2584.33 775.75 2611.83 789.5 2584.33 803.25Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="55" transform="matrix(1 0 0 1 2438.9 794)">. . .</text><path d="M0 0 132.817 0.000360892" stroke="#595959" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1867.82 791)"/><path d="M25.7813-5.15619 145.828-5.15589 145.828 5.15661 25.7812 5.15631ZM30.9375 15.4688 0 0 30.9375-15.4687Z" fill="#595959" transform="matrix(-1 0 0 1 2889.83 789)"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2638.91 812)">A<tspan font-size="37" x="42.9688" y="0">m</tspan></text><g clip-path="url(#clip1)" transform="matrix(0.000360892 0 0 0.000360892 5020 2093)"><g clip-path="url(#clip3)" transform="matrix(1.00109 0 0 1 -0.143555 -0.0979004)"><use width="100%" height="100%" xlink:href="#img2" transform="scale(6362.33 6362.33)"></use></g></g><text fill="#4472C4" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 3159.49 500)">agent<tspan fill="#0AA619" font-size="110" x="2602.26" y="0">env</tspan></text><rect x="3194" y="1819" width="1003" height="276" stroke="#4472C4" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#2E75B6" fill-opacity="0.2"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3302.16 2037)">agent.post_interaction<tspan font-size="64" x="684.636" y="0">(…)</tspan></text><rect x="3194" y="1184" width="1003" height="276" stroke="#4472C4" stroke-width="10.3125" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#2E75B6" fill-opacity="0.2"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3319.64 1402)">agent.pre_interaction<tspan font-size="64" x="649.688" y="0">(…)</tspan></text><path d="M3696.1 1795.46 3695.5 1702.5" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M2.29167-4.79822e-06 2.29198 149.448-2.29135 149.448-2.29167 4.79822e-06ZM13.7503 144.865 0.000360892 172.365-13.7497 144.865Z" fill="#595959" transform="matrix(1 0 0 -1 3695.5 1159.87)"/><text fill="#4472C4" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="73" transform="matrix(1 0 0 1 3420.47 1921)">post<tspan font-size="73" x="154.688" y="0">-</tspan><tspan font-size="73" x="179.323" y="0">interaction</tspan><tspan font-size="73" x="20.3384" y="-635">pre</tspan><tspan font-size="73" x="134.349" y="-635">-</tspan><tspan font-size="73" x="158.984" y="-635">interaction</tspan></text><path d="M3764.03 1056.07C3766.11 1056.07 3768.35 1056.24 3770.74 1056.58 3773.13 1056.91 3775.31 1057.37 3777.28 1057.93L3775.18 1067.62 3770.03 1067.62C3769.79 1065.11 3769.14 1063.26 3768.1 1062.07 3767.06 1060.88 3765.57 1060.28 3763.63 1060.28 3761.82 1060.28 3760.36 1060.75 3759.27 1061.68 3758.17 1062.61 3757.63 1063.88 3757.63 1065.47 3757.63 1066.88 3758.09 1068.11 3759.01 1069.16 3759.92 1070.21 3761.71 1071.54 3764.35 1073.13 3767.22 1074.83 3769.27 1076.59 3770.52 1078.4 3771.76 1080.21 3772.38 1082.33 3772.38 1084.76 3772.38 1087.52 3771.64 1089.87 3770.17 1091.81 3768.7 1093.76 3766.69 1095.2 3764.13 1096.15 3761.58 1097.1 3758.67 1097.57 3755.4 1097.57 3750.16 1097.57 3745.13 1096.92 3740.32 1095.62L3742.59 1085.57 3747.74 1085.57C3747.87 1088 3748.5 1089.91 3749.62 1091.29 3750.74 1092.67 3752.47 1093.35 3754.79 1093.35 3756.93 1093.35 3758.57 1092.83 3759.73 1091.79 3760.9 1090.75 3761.48 1089.29 3761.48 1087.4 3761.48 1086.26 3761.29 1085.3 3760.91 1084.52 3760.53 1083.74 3759.91 1082.97 3759.05 1082.23 3758.18 1081.49 3756.78 1080.52 3754.83 1079.33 3752.13 1077.71 3750.16 1076.03 3748.93 1074.29 3747.71 1072.54 3747.09 1070.59 3747.09 1068.43 3747.09 1064.46 3748.58 1061.4 3751.55 1059.27 3754.52 1057.14 3758.68 1056.07 3764.03 1056.07Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="41.5" x="3740.32" y="1092.38">𝒔</text><path d="M3791.58 1076.77 3798.73 1076.77 3797 1084.22 3804.24 1084.22 3803.43 1087.94 3796.14 1087.94 3793.25 1099.85C3792.75 1101.9 3792.41 1103.45 3792.22 1104.52 3792.03 1105.58 3791.94 1106.48 3791.94 1107.21 3791.94 1108.14 3792.11 1108.82 3792.44 1109.25 3792.78 1109.68 3793.28 1109.89 3793.93 1109.89 3794.83 1109.89 3795.74 1109.57 3796.69 1108.94 3797.63 1108.3 3798.65 1107.32 3799.74 1105.99L3802.03 1108.34C3800.03 1110.55 3798.19 1112.08 3796.51 1112.95 3794.83 1113.81 3792.85 1114.24 3790.57 1114.24 3788.3 1114.24 3786.57 1113.66 3785.35 1112.5 3784.14 1111.34 3783.54 1109.73 3783.54 1107.69 3783.54 1106.75 3783.61 1105.87 3783.75 1105.02 3783.88 1104.18 3784.17 1102.79 3784.61 1100.87L3787.68 1087.94 3783.03 1087.94 3783.6 1085.53C3784.99 1085.53 3786.04 1085.4 3786.75 1085.14 3787.47 1084.88 3788.07 1084.5 3788.56 1083.98 3789.04 1083.46 3789.51 1082.68 3789.97 1081.63 3790.43 1080.57 3790.96 1078.96 3791.58 1076.77Z" fill="#595959" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="37.4697" x="3783.03" y="1109.56">𝒕</text><text fill="#0AA619" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="73" transform="matrix(1 0 0 1 4489.39 1670)">reset<tspan fill="#595959" font-size="64" x="-102.352" y="127">env.reset</tspan><tspan fill="#595959" font-size="64" x="173.61" y="127">(…)</tspan></text><path d="M3703.5 1708.21 3721.84 1708.23 3721.83 1712.81 3703.5 1712.79ZM3735.59 1708.24 3753.92 1708.26 3753.91 1712.84 3735.58 1712.82ZM3767.67 1708.27 3786 1708.28 3786 1712.87 3767.66 1712.85ZM3799.75 1708.3 3818.09 1708.31 3818.08 1712.9 3799.75 1712.88ZM3831.84 1708.33 3850.17 1708.34 3850.16 1712.93 3831.83 1712.91ZM3863.92 1708.36 3882.25 1708.37 3882.25 1712.96 3863.91 1712.94ZM3896 1708.39 3914.34 1708.4 3914.33 1712.99 3896 1712.97ZM3928.09 1708.42 3946.42 1708.43 3946.41 1713.02 3928.08 1713ZM3960.17 1708.45 3978.5 1708.46 3978.5 1713.05 3960.16 1713.03ZM3992.25 1708.48 4010.59 1708.49 4010.58 1713.08 3992.25 1713.06ZM4024.34 1708.51 4042.67 1708.52 4042.66 1713.11 4024.33 1713.09ZM4056.42 1708.54 4074.75 1708.55 4074.75 1713.14 4056.41 1713.12ZM4088.5 1708.57 4106.84 1708.58 4106.83 1713.17 4088.5 1713.15ZM4120.59 1708.6 4138.92 1708.61 4138.91 1713.2 4120.58 1713.18ZM4152.67 1708.62 4171 1708.64 4171 1713.23 4152.66 1713.21ZM4184.75 1708.65 4203.09 1708.67 4203.08 1713.25 4184.75 1713.24ZM4216.84 1708.68 4235.17 1708.7 4235.16 1713.28 4216.83 1713.27ZM4248.92 1708.71 4267.25 1708.73 4267.25 1713.31 4248.91 1713.3ZM4281 1708.74 4299.34 1708.76 4299.33 1713.34 4281 1713.33ZM4313.09 1708.77 4331.42 1708.79 4331.41 1713.37 4313.08 1713.36ZM4345.17 1708.8 4363.5 1708.82 4363.5 1713.4 4345.16 1713.39ZM4377.25 1708.83 4395.59 1708.85 4395.58 1713.43 4377.25 1713.42ZM4409.34 1708.86 4427.67 1708.88 4427.66 1713.46 4409.33 1713.45ZM4441.42 1708.89 4459.75 1708.91 4459.75 1713.49 4441.41 1713.48ZM4473.5 1708.92 4491.84 1708.94 4491.83 1713.52 4473.5 1713.51ZM4505.59 1708.95 4523.92 1708.97 4523.91 1713.55 4505.58 1713.54ZM4537.67 1708.98 4556 1709 4556 1713.58 4537.66 1713.57ZM4569.75 1709.01 4588.09 1709.03 4588.08 1713.61 4569.75 1713.59ZM4601.84 1709.04 4620.17 1709.06 4620.16 1713.64 4601.83 1713.62ZM4633.92 1709.07 4652.25 1709.09 4652.25 1713.67 4633.91 1713.65ZM4666 1709.1 4684.34 1709.12 4684.33 1713.7 4666 1713.68ZM4698.09 1709.13 4716.42 1709.15 4716.41 1713.73 4698.08 1713.71ZM4730.17 1709.16 4748.5 1709.18 4748.5 1713.76 4730.16 1713.74ZM4762.25 1709.19 4780.58 1709.21 4780.58 1713.79 4762.25 1713.77ZM4794.33 1709.22 4812.67 1709.24 4812.66 1713.82 4794.33 1713.8ZM4826.42 1709.25 4844.75 1709.27 4844.75 1713.85 4826.41 1713.83ZM4858.5 1709.28 4867.33 1709.29 4867.33 1713.87 4858.5 1713.86ZM4862.76 1697.82 4890.25 1711.6 4862.73 1725.32Z" fill="#595959"/><path d="M3686.5 1711C3686.5 1706.31 3690.31 1702.5 3695 1702.5 3699.69 1702.5 3703.5 1706.31 3703.5 1711 3703.5 1715.69 3699.69 1719.5 3695 1719.5 3690.31 1719.5 3686.5 1715.69 3686.5 1711Z" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="#595959" fill-rule="evenodd"/><path d="M0 0 696.934 0.0487205" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 3686.43 1710.5)"/><path d="M2.29167-4.79822e-06 2.29198 149.448-2.29135 149.448-2.29167 4.79822e-06ZM13.7503 144.865 0.000360892 172.365-13.7497 144.865Z" fill="#595959" transform="matrix(1 0 0 -1 3694.5 2294.87)"/><path d="M2.29167-8.38449e-06 2.29194 75.7232-2.29139 75.7232-2.29167 8.38449e-06ZM13.7503 71.1398 0.000360892 98.6398-13.7497 71.1399Z" fill="#595959" transform="matrix(1 0 0 -1 3695.5 1585.14)"/><path d="M0 0 706.885 0.293406" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 3697.39 1584.5)"/><path d="M2936.5 1584.5C2936.5 1570.14 2948.59 1558.5 2963.5 1558.5 2978.41 1558.5 2990.5 1570.14 2990.5 1584.5 2990.5 1598.86 2978.41 1610.5 2963.5 1610.5 2948.59 1610.5 2936.5 1598.86 2936.5 1584.5Z" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M2936.5 1710.5C2936.5 1696.14 2948.36 1684.5 2963 1684.5 2977.64 1684.5 2989.5 1696.14 2989.5 1710.5 2989.5 1724.86 2977.64 1736.5 2963 1736.5 2948.36 1736.5 2936.5 1724.86 2936.5 1710.5Z" stroke="#595959" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2315.08 1632)">training / evaluation<tspan font-size="64" x="163.556" y="77">iteration</tspan></text></g></svg>
\ No newline at end of file
diff --git a/docs/source/_static/imgs/rl_schema.svg b/docs/source/_static/imgs/rl_schema.svg
index ca892130..aac7fe7a 100755
--- a/docs/source/_static/imgs/rl_schema.svg
+++ b/docs/source/_static/imgs/rl_schema.svg
@@ -1 +1 @@
-<svg width="4133" height="1985" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="1234" y="659" width="4133" height="1985"/></clipPath><clipPath id="clip1"><rect x="2102" y="1236" width="430" height="741"/></clipPath><clipPath id="clip2"><rect x="2102" y="1236" width="430" height="741"/></clipPath><clipPath id="clip3"><rect x="2102" y="1236" width="430" height="741"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-1234 -659)"><path d="M1771 1604.5C1771 1303.78 2014.78 1060 2315.5 1060 2616.22 1060 2860 1303.78 2860 1604.5 2860 1905.22 2616.22 2149 2315.5 2149 2014.78 2149 1771 1905.22 1771 1604.5Z" fill="#FFC000" fill-rule="evenodd"/><path d="M3996 1608.5C3996 1307.78 4239.78 1064 4540.5 1064 4841.22 1064 5085 1307.78 5085 1608.5 5085 1909.22 4841.22 2153 4540.5 2153 4239.78 2153 3996 1909.22 3996 1608.5Z" fill="#BDD7EE" fill-rule="evenodd"/><path d="M4047 1604C4047 1330.62 4268.62 1109 4542 1109 4815.38 1109 5037 1330.62 5037 1604 5037 1877.38 4815.38 2099 4542 2099 4268.62 2099 4047 1877.38 4047 1604Z" stroke="#404040" stroke-width="13.75" stroke-miterlimit="8" fill="#BDD7EE" fill-rule="evenodd"/><path d="M4158.5 1604.5C4158.5 1331.12 4330.42 1109.5 4542.5 1109.5 4754.58 1109.5 4926.5 1331.12 4926.5 1604.5 4926.5 1877.88 4754.58 2099.5 4542.5 2099.5 4330.42 2099.5 4158.5 1877.88 4158.5 1604.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4297.5 1604.5C4297.5 1331.12 4407.19 1109.5 4542.5 1109.5 4677.81 1109.5 4787.5 1331.12 4787.5 1604.5 4787.5 1877.88 4677.81 2099.5 4542.5 2099.5 4407.19 2099.5 4297.5 1877.88 4297.5 1604.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4456.5 1604.5C4456.5 1331.12 4495 1109.5 4542.5 1109.5 4590 1109.5 4628.5 1331.12 4628.5 1604.5 4628.5 1877.88 4590 2099.5 4542.5 2099.5 4495 2099.5 4456.5 1877.88 4456.5 1604.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4542.5 1220.5C4815.88 1220.5 5037.5 1392.42 5037.5 1604.5 5037.5 1816.58 4815.88 1988.5 4542.5 1988.5 4269.12 1988.5 4047.5 1816.58 4047.5 1604.5 4047.5 1392.42 4269.12 1220.5 4542.5 1220.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4542.5 1359.5C4815.88 1359.5 5037.5 1469.19 5037.5 1604.5 5037.5 1739.81 4815.88 1849.5 4542.5 1849.5 4269.12 1849.5 4047.5 1739.81 4047.5 1604.5 4047.5 1469.19 4269.12 1359.5 4542.5 1359.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4542.5 1518.5C4815.88 1518.5 5037.5 1557 5037.5 1604.5 5037.5 1652 4815.88 1690.5 4542.5 1690.5 4269.12 1690.5 4047.5 1652 4047.5 1604.5 4047.5 1557 4269.12 1518.5 4542.5 1518.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4672.2 1128.93C4937.44 1225.75 4966.38 1367.93 4976.02 1364.4 5033.28 1466.82 5055.76 1614.69 5022.52 1719.37 5006.63 1807.64 5001.24 1781.58 4990.69 1789.82 4982.42 1790.79 4965.63 1796.29 4952.44 1792.92 4939.25 1789.54 4935.9 1784.4 4911.57 1769.55 4887.25 1754.7 4827.16 1722.82 4806.48 1703.83 4785.81 1684.85 4788.73 1669.03 4787.51 1655.64 4786.29 1642.26 4790.43 1638.36 4799.19 1623.52 4807.94 1608.67 4829.35 1587.98 4840.05 1566.56 4850.76 1545.15 4864.87 1513.02 4863.41 1495.01 4861.95 1477 4844.19 1469.45 4831.3 1458.5 4818.4 1447.55 4801.38 1439.27 4786.05 1429.29 4770.72 1419.31 4750.29 1408.6 4739.34 1398.62 4728.4 1388.64 4727.89 1380.9 4720.37 1369.42 4714.82 1360.94 4713.56 1360.9 4702.85 1344.59 4692.15 1328.28 4665.88 1288.85 4656.15 1271.57 4646.17 1253.08 4642.77 1246.02 4643.01 1233.61 4643.26 1221.19 4654.69 1205.86 4657.61 1197.1 4662.23 1179.33 4668.55 1138.68 4670.74 1127L4670.74 1127" fill="#70AD47" fill-rule="evenodd"/><path d="M5002.5 1787.54 5002.5 1787.54C4981.85 1793.18 4967.96 1796.31 4952.88 1793.39 4937.8 1790.47 4936.35 1784.87 4912.02 1770.02 4887.7 1755.18 4827.63 1723.29 4806.95 1704.31 4786.28 1685.33 4789.2 1669.51 4787.98 1656.12 4786.77 1642.73 4790.9 1638.84 4799.66 1623.99 4808.41 1609.15 4829.82 1588.46 4840.52 1567.04 4851.22 1545.63 4865.33 1513.5 4863.87 1495.49 4862.41 1477.48 4844.65 1469.93 4831.76 1458.98 4818.87 1448.03 4801.85 1439.76 4786.52 1429.78 4771.2 1419.8 4750.77 1409.09 4739.83 1399.11 4728.88 1389.13 4728.38 1381.39 4720.86 1369.91 4715.31 1361.43 4714.04 1361.39 4703.34 1345.08 4692.64 1328.78 4666.37 1289.35 4656.65 1272.07 4646.67 1253.57 4643.27 1246.51 4643.51 1234.1 4643.76 1221.69 4655.19 1206.35 4658.1 1197.59 4662.73 1179.83 4669.05 1139.18 4671.24 1127.5L4671.24 1127.5" stroke="#385723" stroke-width="20.625" stroke-linecap="round" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4048.9 1658.34C4050.22 1657.6 4013.53 1393.27 4228.11 1225.64 4231.09 1225.1 4238.26 1213.46 4238.09 1226.86 4237.91 1240.26 4221.05 1286.15 4227.07 1306.04 4233.09 1325.93 4240.77 1357.4 4274.22 1346.22 4307.68 1335.04 4399.15 1246.62 4427.79 1238.97 4492.21 1221.76 4457.74 1278.99 4447.48 1305.91 4437.97 1330.87 4394.54 1345.72 4404.96 1397.55 4415.39 1449.39 4514.18 1556.15 4510.02 1616.9 4505.86 1677.65 4420.77 1725.93 4380 1762.03 4339.24 1798.13 4297.41 1843.6 4265.43 1833.52 4233.44 1823.43 4223.35 1729.11 4188.09 1701.52 4152.83 1673.92 4088.39 1668.59 4053.89 1667.93" fill="#70AD47" fill-rule="evenodd"/><path d="M4238.27 1220.68C4239.42 1233.33 4221.23 1279.96 4227.25 1299.86 4233.28 1319.75 4240.95 1351.21 4274.41 1340.03 4307.86 1328.85 4399.34 1240.43 4427.98 1232.78 4492.4 1215.57 4457.92 1272.81 4447.67 1299.73 4438.16 1324.68 4394.73 1339.54 4405.15 1391.37 4415.57 1443.2 4514.37 1549.97 4510.21 1610.71 4506.04 1671.46 4420.95 1719.74 4380.19 1755.84 4339.42 1791.95 4297.6 1837.42 4265.61 1827.33 4233.63 1817.25 4223.54 1722.93 4188.28 1695.33 4153.02 1667.73 4088.57 1662.4 4054.07 1661.75" stroke="#385723" stroke-width="20.625" stroke-linecap="round" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4589.23 2095.01C4592.57 2086.67 4594.98 2082.27 4593.61 2073.13 4592.24 2063.98 4582.89 2047.64 4581 2040.12 4588.97 2005.67 4597.6 2022.4 4603.85 2013.31 4610.09 2004.22 4613.11 1993.37 4618.47 1985.59 4623.83 1977.81 4628.95 1970.03 4636.01 1966.62 4643.08 1963.22 4652.58 1965.65 4660.87 1965.17 4664.28 1962.73 4667.58 1960.13 4671.1 1957.87 4700.72 1938.87 4700.72 1952.75 4760.29 1954.95 4769.06 1957.38 4776.13 1962.98 4786.61 1962.25 4797.08 1961.52 4808.67 1951.76 4823.16 1950.58L4873.56 1955.14C4877.03 1958.76 4895.61 1954.68 4872.87 1971.62 4758.11 2076.26 4645.41 2086.1 4592.6 2098" fill="#70AD47" fill-rule="evenodd"/><path d="M4590.7 2095.5C4594.03 2087.16 4596.43 2082.77 4595.07 2073.62 4593.7 2064.47 4584.38 2048.14 4582.5 2040.61 4590.44 2006.16 4599.04 2022.89 4605.26 2013.81 4611.48 2004.72 4614.49 1993.87 4619.83 1986.09 4625.17 1978.31 4630.27 1970.53 4637.31 1967.12 4644.35 1963.72 4653.82 1966.15 4662.08 1965.66 4665.47 1963.23 4668.76 1960.63 4672.27 1958.37 4701.79 1939.37 4701.78 1953.25 4761.13 1955.45 4769.87 1957.88 4776.92 1963.48 4787.35 1962.75 4797.79 1962.02 4814.79 1952.54 4823.77 1951.08L4883.5 1958.37" stroke="#385723" stroke-width="20.625" stroke-linecap="round" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4046.5 1606.5C4046.5 1333.12 4268.12 1111.5 4541.5 1111.5 4814.88 1111.5 5036.5 1333.12 5036.5 1606.5 5036.5 1879.88 4814.88 2101.5 4541.5 2101.5 4268.12 2101.5 4046.5 1879.88 4046.5 1606.5Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M1870 1603C1870 1356.68 2069.68 1157 2316 1157 2562.32 1157 2762 1356.68 2762 1603 2762 1849.32 2562.32 2049 2316 2049 2069.68 2049 1870 1849.32 1870 1603Z" fill="#EF7F1A" fill-rule="evenodd"/><path d="M1821.5 1603.5C1821.5 1330.12 2043.12 1108.5 2316.5 1108.5 2589.88 1108.5 2811.5 1330.12 2811.5 1603.5 2811.5 1876.88 2589.88 2098.5 2316.5 2098.5 2043.12 2098.5 1821.5 1876.88 1821.5 1603.5Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 6.875-189.063 6.875-182.188 0-182.188 2228.27-189.063 2221.39-30.6076 2221.39-30.6076 2235.14-195.938 2235.14-195.938-6.875 0-6.875ZM-37.4826 2207.64 3.76736 2228.27-37.4826 2248.89Z" fill="#404040" transform="matrix(1.83697e-16 1 1 -1.83697e-16 2314 1033)"/><path d="M4514.32 2175 4514.32 2372.3 3523 2372.3" stroke="#404040" stroke-width="13.75" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 3292.07 787)">action (</text><path d="M3686.48 742.788C3683.42 742.788 3680.71 744.325 3678.34 747.399 3675.97 750.473 3674.07 754.608 3672.62 759.802 3671.17 764.997 3670.45 769.649 3670.45 773.759 3670.45 776.159 3670.8 777.951 3671.51 779.134 3672.22 780.318 3673.31 780.91 3674.79 780.91 3676.96 780.91 3679.17 779.849 3681.42 777.729 3683.68 775.608 3685.49 773.118 3686.87 770.257 3688.25 767.397 3689.49 763.484 3690.57 758.52L3690.97 756.695C3691.49 754.262 3691.76 751.829 3691.76 749.396 3691.76 747.227 3691.34 745.583 3690.52 744.465 3689.7 743.347 3688.35 742.788 3686.48 742.788ZM3685.89 737.659C3690.79 737.659 3694.96 738.843 3698.41 741.21L3703.2 737.659 3708.72 738.448 3700.73 771.983C3700.24 774.055 3699.99 775.929 3699.99 777.605 3699.99 778.69 3700.17 779.529 3700.53 780.121 3700.9 780.712 3701.52 781.008 3702.41 781.008 3703.43 781.008 3704.53 780.548 3705.71 779.627 3706.9 778.707 3708.28 777.342 3709.85 775.534L3713.41 779.085C3709.95 782.537 3706.98 784.912 3704.48 786.211 3701.98 787.51 3699.35 788.159 3696.59 788.159 3693.83 788.159 3691.64 787.395 3690.03 785.866 3688.42 784.337 3687.61 782.323 3687.61 779.825 3687.61 779.003 3687.71 778.214 3687.91 777.457L3687.22 777.359C3684.82 780.186 3682.76 782.315 3681.03 783.745 3679.3 785.175 3677.47 786.269 3675.53 787.025 3673.59 787.781 3671.44 788.159 3669.07 788.159 3665.12 788.159 3662.13 786.819 3660.09 784.14 3658.06 781.46 3657.04 777.556 3657.04 772.427 3657.04 767.989 3657.68 763.583 3658.96 759.21 3660.24 754.838 3662.08 751.065 3664.48 747.892 3666.88 744.72 3669.9 742.221 3673.53 740.396 3677.17 738.571 3681.28 737.659 3685.89 737.659Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="50.5" x="3657.04" y="781.847">𝒂</text><path d="M3728.9 763.072 3737.58 763.072 3735.48 772.105 3744.26 772.105 3743.29 776.622 3734.43 776.622 3730.93 791.075C3730.33 793.556 3729.91 795.441 3729.68 796.73 3729.45 798.019 3729.34 799.109 3729.34 800 3729.34 801.132 3729.54 801.957 3729.95 802.475 3730.36 802.993 3730.96 803.252 3731.76 803.252 3732.84 803.252 3733.96 802.867 3735.1 802.096 3736.25 801.325 3737.48 800.133 3738.8 798.519L3741.59 801.373C3739.15 804.047 3736.92 805.908 3734.88 806.956 3732.85 808.003 3730.45 808.527 3727.68 808.527 3724.93 808.527 3722.82 807.823 3721.35 806.414 3719.88 805.004 3719.15 803.059 3719.15 800.578 3719.15 799.446 3719.23 798.368 3719.4 797.344 3719.57 796.32 3719.92 794.64 3720.45 792.304L3724.17 776.622 3718.53 776.622 3719.22 773.695C3720.91 773.695 3722.18 773.539 3723.05 773.226 3723.92 772.912 3724.65 772.443 3725.24 771.816 3725.83 771.19 3726.4 770.239 3726.95 768.962 3727.51 767.685 3728.16 765.722 3728.9 763.072Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="45.4551" x="3718.53" y="802.845">𝒕</text><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 3749.44 787)">)<tspan font-size="101" x="155.055" y="1531">state (</tspan></text><path d="M4233.73 2268.15C4236.26 2268.15 4238.98 2268.35 4241.89 2268.77 4244.8 2269.18 4247.45 2269.73 4249.85 2270.42L4247.29 2282.2 4241.02 2282.2C4240.73 2279.15 4239.95 2276.89 4238.68 2275.45 4237.42 2274 4235.6 2273.28 4233.23 2273.28 4231.03 2273.28 4229.26 2273.85 4227.93 2274.98 4226.6 2276.11 4225.93 2277.65 4225.93 2279.59 4225.93 2281.3 4226.49 2282.8 4227.61 2284.08 4228.73 2285.36 4230.9 2286.97 4234.12 2288.91 4237.6 2290.98 4240.1 2293.12 4241.62 2295.32 4243.13 2297.53 4243.88 2300.11 4243.88 2303.07 4243.88 2306.42 4242.99 2309.28 4241.2 2311.65 4239.4 2314.01 4236.96 2315.77 4233.85 2316.92 4230.74 2318.07 4227.2 2318.65 4223.22 2318.65 4216.84 2318.65 4210.73 2317.86 4204.88 2316.28L4207.64 2304.05 4213.9 2304.05C4214.06 2307.01 4214.83 2309.33 4216.19 2311.01 4217.56 2312.68 4219.65 2313.52 4222.48 2313.52 4225.08 2313.52 4227.08 2312.89 4228.5 2311.62 4229.91 2310.36 4230.62 2308.57 4230.62 2306.27 4230.62 2304.89 4230.39 2303.72 4229.93 2302.77 4229.47 2301.82 4228.71 2300.89 4227.66 2299.98 4226.61 2299.08 4224.9 2297.9 4222.53 2296.46 4219.24 2294.48 4216.85 2292.44 4215.35 2290.32 4213.86 2288.2 4213.11 2285.82 4213.11 2283.19 4213.11 2278.36 4214.92 2274.64 4218.54 2272.05 4222.15 2269.45 4227.22 2268.15 4233.73 2268.15Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="50.5" x="4204.88" y="2312.34">𝒔</text><path d="M4267.42 2293.56 4276.1 2293.56 4274 2302.6 4282.78 2302.6 4281.8 2307.11 4272.95 2307.11 4269.45 2321.57C4268.84 2324.05 4268.43 2325.93 4268.2 2327.22 4267.97 2328.51 4267.86 2329.6 4267.86 2330.49 4267.86 2331.62 4268.06 2332.45 4268.47 2332.97 4268.88 2333.48 4269.48 2333.74 4270.28 2333.74 4271.36 2333.74 4272.48 2333.36 4273.62 2332.59 4274.76 2331.82 4276 2330.62 4277.32 2329.01L4280.11 2331.86C4277.67 2334.54 4275.44 2336.4 4273.4 2337.45 4271.37 2338.49 4268.96 2339.02 4266.19 2339.02 4263.45 2339.02 4261.34 2338.31 4259.87 2336.9 4258.4 2335.49 4257.67 2333.55 4257.67 2331.07 4257.67 2329.94 4257.75 2328.86 4257.92 2327.83 4258.09 2326.81 4258.44 2325.13 4258.97 2322.79L4262.69 2307.11 4257.05 2307.11 4257.74 2304.19C4259.43 2304.19 4260.7 2304.03 4261.57 2303.72 4262.44 2303.4 4263.17 2302.93 4263.76 2302.31 4264.35 2301.68 4264.92 2300.73 4265.47 2299.45 4266.03 2298.18 4266.68 2296.21 4267.42 2293.56Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="45.4551" x="4257.05" y="2333.34">𝒕</text><path d="M4307.76 2295.51 4313.61 2295.51 4313.61 2314.52 4331.49 2314.52 4331.49 2320.05 4313.61 2320.05 4313.61 2339.05 4307.76 2339.05 4307.76 2320.05 4289.87 2320.05 4289.87 2314.52 4307.76 2314.52Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="43.54" x="4289.87" y="2333.61">+</text><path d="M4361.83 2288.54 4366.46 2288.54C4366.31 2290.85 4366.24 2294.06 4366.24 2298.15L4366.24 2328.36C4366.24 2329.76 4366.32 2330.81 4366.47 2331.52 4366.63 2332.23 4366.91 2332.81 4367.31 2333.25 4367.7 2333.7 4368.28 2334.05 4369.02 2334.3 4369.77 2334.56 4370.71 2334.75 4371.84 2334.88 4372.97 2335.01 4374.47 2335.1 4376.32 2335.15L4376.32 2338.44 4345.14 2338.44 4345.14 2335.15C4347.81 2335.03 4349.72 2334.87 4350.87 2334.66 4352.01 2334.46 4352.88 2334.15 4353.48 2333.74 4354.09 2333.33 4354.53 2332.75 4354.8 2332.01 4355.08 2331.26 4355.22 2330.04 4355.22 2328.36L4355.22 2302.23C4355.22 2301.25 4355.03 2300.54 4354.66 2300.1 4354.29 2299.67 4353.77 2299.45 4353.12 2299.45 4352.52 2299.45 4351.67 2299.73 4350.58 2300.28 4349.48 2300.84 4347.68 2301.97 4345.17 2303.68L4342.97 2299.85Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="49.8994" x="4342.97" y="2332.2">𝟏</text><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 4385.35 2318)">)<tspan font-size="101" x="-529.834" y="242">reward (</tspan></text><path d="M4265.92 2510.15C4268.65 2510.15 4270.83 2510.91 4272.45 2512.44 4274.08 2513.97 4274.89 2515.99 4274.89 2518.48 4274.89 2519.37 4274.71 2520.49 4274.35 2521.84L4274.75 2521.99C4277.51 2518.11 4280.4 2515.16 4283.43 2513.16 4286.45 2511.15 4289.8 2510.15 4293.49 2510.15 4295.66 2510.15 4297.96 2510.46 4300.39 2511.09L4297.48 2524.35 4288.65 2524.35C4288.65 2522.15 4288.42 2520.62 4287.94 2519.77 4287.46 2518.91 4286.65 2518.48 4285.5 2518.48 4284.61 2518.48 4283.64 2518.8 4282.59 2519.42 4281.54 2520.05 4280.28 2521.26 4278.81 2523.07 4277.35 2524.88 4276.25 2526.6 4275.51 2528.25 4274.77 2529.89 4274.12 2532 4273.56 2534.56L4267.99 2559.86 4253.84 2559.86 4261.78 2525.59C4262.27 2523.45 4262.52 2521.82 4262.52 2520.7 4262.52 2519.62 4262.34 2518.78 4261.97 2518.19 4261.61 2517.6 4260.99 2517.3 4260.1 2517.3 4259.08 2517.3 4257.98 2517.76 4256.8 2518.68 4255.61 2519.6 4254.23 2520.97 4252.65 2522.77L4249.1 2519.22C4252.39 2515.87 4255.31 2513.52 4257.86 2512.17 4260.4 2510.82 4263.09 2510.15 4265.92 2510.15Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="49.7109" x="4249.1" y="2553.65">𝒓</text><path d="M4316.42 2535.56 4325.09 2535.56 4323 2544.6 4331.78 2544.6 4330.8 2549.11 4321.95 2549.11 4318.44 2563.57C4317.84 2566.05 4317.43 2567.93 4317.2 2569.22 4316.97 2570.51 4316.85 2571.6 4316.85 2572.49 4316.85 2573.62 4317.06 2574.45 4317.47 2574.97 4317.88 2575.48 4318.48 2575.74 4319.27 2575.74 4320.36 2575.74 4321.47 2575.36 4322.62 2574.59 4323.76 2573.82 4325 2572.62 4326.32 2571.01L4329.1 2573.86C4326.67 2576.54 4324.44 2578.4 4322.4 2579.45 4320.36 2580.49 4317.96 2581.02 4315.19 2581.02 4312.44 2581.02 4310.34 2580.31 4308.87 2578.9 4307.4 2577.49 4306.66 2575.55 4306.66 2573.07 4306.66 2571.94 4306.75 2570.86 4306.92 2569.83 4307.08 2568.81 4307.43 2567.13 4307.96 2564.79L4311.69 2549.11 4306.05 2549.11 4306.74 2546.19C4308.42 2546.19 4309.7 2546.03 4310.57 2545.72 4311.43 2545.4 4312.16 2544.93 4312.75 2544.31 4313.34 2543.68 4313.91 2542.73 4314.47 2541.45 4315.02 2540.18 4315.67 2538.21 4316.42 2535.56Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="45.4551" x="4306.05" y="2575.34">𝒕</text><path d="M4356.75 2537.51 4362.6 2537.51 4362.6 2556.52 4380.49 2556.52 4380.49 2562.05 4362.6 2562.05 4362.6 2581.05 4356.75 2581.05 4356.75 2562.05 4338.87 2562.05 4338.87 2556.52 4356.75 2556.52Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="43.54" x="4338.87" y="2575.61">+</text><path d="M4410.83 2530.54 4415.45 2530.54C4415.31 2532.85 4415.24 2536.06 4415.24 2540.15L4415.24 2570.36C4415.24 2571.76 4415.31 2572.81 4415.47 2573.52 4415.63 2574.23 4415.9 2574.81 4416.3 2575.25 4416.7 2575.7 4417.27 2576.05 4418.02 2576.3 4418.76 2576.56 4419.7 2576.75 4420.84 2576.88 4421.97 2577.01 4423.46 2577.1 4425.32 2577.15L4425.32 2580.44 4394.13 2580.44 4394.13 2577.15C4396.81 2577.03 4398.72 2576.87 4399.86 2576.66 4401.01 2576.46 4401.88 2576.15 4402.48 2575.74 4403.08 2575.33 4403.52 2574.75 4403.8 2574.01 4404.08 2573.26 4404.22 2572.04 4404.22 2570.36L4404.22 2544.23C4404.22 2543.25 4404.03 2542.54 4403.66 2542.1 4403.28 2541.67 4402.77 2541.45 4402.12 2541.45 4401.52 2541.45 4400.67 2541.73 4399.57 2542.28 4398.48 2542.84 4396.68 2543.97 4394.17 2545.68L4391.97 2541.85Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="49.8994" x="4391.97" y="2574.2">𝟏</text><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 4434.35 2560)">)</text><path d="M3301.07 2378.7 2278.12 2378.7 2278.12 2209.38 2291.87 2209.38 2291.87 2371.83 2285 2364.95 3301.07 2364.95ZM2264.37 2216.25 2285 2175 2305.62 2216.25Z" fill="#404040"/><path d="M3364.04 2440.68 2335.12 2440.68 2335.12 2209.38 2348.88 2209.38 2348.88 2433.81 2342 2426.93 3364.04 2426.93ZM2321.38 2216.25 2342 2175 2362.62 2216.25Z" fill="#404040"/><path d="M4572.17 2176 4572.17 2433.91 3579 2433.91" stroke="#404040" stroke-width="13.75" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3364 2434 3578.96 2434" stroke="#404040" stroke-width="13.75" stroke-miterlimit="8" stroke-dasharray="13.75 13.75" fill="none" fill-rule="evenodd"/><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 2473.07 2315)">state (</text><path d="M2802.3 2265.59C2804.83 2265.59 2807.55 2265.8 2810.46 2266.21 2813.37 2266.62 2816.03 2267.17 2818.43 2267.86L2815.86 2279.65 2809.6 2279.65C2809.31 2276.59 2808.52 2274.34 2807.26 2272.89 2805.99 2271.44 2804.18 2270.72 2801.81 2270.72 2799.61 2270.72 2797.84 2271.29 2796.51 2272.42 2795.18 2273.56 2794.51 2275.09 2794.51 2277.03 2794.51 2278.74 2795.07 2280.24 2796.19 2281.52 2797.31 2282.8 2799.48 2284.42 2802.7 2286.35 2806.18 2288.43 2808.68 2290.56 2810.19 2292.77 2811.71 2294.97 2812.46 2297.55 2812.46 2300.51 2812.46 2303.86 2811.57 2306.72 2809.77 2309.09 2807.98 2311.46 2805.53 2313.22 2802.43 2314.37 2799.32 2315.52 2795.78 2316.09 2791.8 2316.09 2785.42 2316.09 2779.3 2315.3 2773.45 2313.73L2776.21 2301.49 2782.48 2301.49C2782.64 2304.45 2783.41 2306.77 2784.77 2308.45 2786.14 2310.13 2788.23 2310.96 2791.06 2310.96 2793.66 2310.96 2795.66 2310.33 2797.07 2309.06 2798.49 2307.8 2799.2 2306.02 2799.2 2303.71 2799.2 2302.33 2798.97 2301.17 2798.51 2300.21 2798.04 2299.26 2797.29 2298.33 2796.24 2297.43 2795.18 2296.52 2793.47 2295.35 2791.11 2293.9 2787.82 2291.93 2785.43 2289.88 2783.93 2287.76 2782.44 2285.64 2781.69 2283.26 2781.69 2280.63 2781.69 2275.8 2783.5 2272.09 2787.11 2269.49 2790.73 2266.89 2795.79 2265.59 2802.3 2265.59Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="50.5" x="2773.45" y="2309.78">𝒔</text><path d="M2836 2291.01 2844.67 2291.01 2842.58 2300.04 2851.36 2300.04 2850.38 2304.56 2841.53 2304.56 2838.02 2319.01C2837.42 2321.49 2837.01 2323.37 2836.78 2324.66 2836.55 2325.95 2836.43 2327.04 2836.43 2327.93 2836.43 2329.07 2836.64 2329.89 2837.05 2330.41 2837.46 2330.93 2838.06 2331.19 2838.86 2331.19 2839.94 2331.19 2841.05 2330.8 2842.2 2330.03 2843.34 2329.26 2844.58 2328.07 2845.9 2326.45L2848.68 2329.31C2846.25 2331.98 2844.02 2333.84 2841.98 2334.89 2839.95 2335.94 2837.54 2336.46 2834.77 2336.46 2832.03 2336.46 2829.92 2335.76 2828.45 2334.35 2826.98 2332.94 2826.24 2330.99 2826.24 2328.51 2826.24 2327.38 2826.33 2326.3 2826.5 2325.28 2826.67 2324.25 2827.02 2322.57 2827.55 2320.24L2831.27 2304.56 2825.63 2304.56 2826.32 2301.63C2828 2301.63 2829.28 2301.47 2830.15 2301.16 2831.01 2300.85 2831.74 2300.38 2832.33 2299.75 2832.92 2299.12 2833.5 2298.17 2834.05 2296.9 2834.6 2295.62 2835.25 2293.66 2836 2291.01Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="45.4551" x="2825.63" y="2330.78">𝒕</text><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 2856.53 2315)">)<tspan font-size="101" x="-432.438" y="242">reward (</tspan></text><path d="M2834.5 2507.59C2837.22 2507.59 2839.4 2508.36 2841.03 2509.89 2842.66 2511.41 2843.47 2513.43 2843.47 2515.93 2843.47 2516.81 2843.29 2517.93 2842.93 2519.28L2843.32 2519.43C2846.09 2515.55 2848.98 2512.61 2852 2510.6 2855.03 2508.6 2858.38 2507.59 2862.06 2507.59 2864.23 2507.59 2866.54 2507.9 2868.97 2508.53L2866.06 2521.8 2857.23 2521.8C2857.23 2519.59 2856.99 2518.06 2856.52 2517.21 2856.04 2516.35 2855.23 2515.93 2854.07 2515.93 2853.19 2515.93 2852.22 2516.24 2851.17 2516.86 2850.11 2517.49 2848.86 2518.71 2847.39 2520.51 2845.93 2522.32 2844.83 2524.05 2844.09 2525.69 2843.35 2527.34 2842.7 2529.44 2842.14 2532L2836.57 2557.3 2822.41 2557.3 2830.35 2523.03C2830.85 2520.89 2831.09 2519.26 2831.09 2518.15 2831.09 2517.06 2830.91 2516.22 2830.55 2515.63 2830.19 2515.04 2829.56 2514.74 2828.68 2514.74 2827.66 2514.74 2826.56 2515.2 2825.37 2516.12 2824.19 2517.04 2822.81 2518.41 2821.23 2520.22L2817.68 2516.67C2820.97 2513.31 2823.88 2510.96 2826.43 2509.61 2828.98 2508.27 2831.67 2507.59 2834.5 2507.59Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="49.7109" x="2817.68" y="2551.09">𝒓</text><path d="M2885 2533.01 2893.67 2533.01 2891.57 2542.04 2900.35 2542.04 2899.38 2546.56 2890.52 2546.56 2887.02 2561.01C2886.42 2563.49 2886 2565.37 2885.77 2566.66 2885.54 2567.95 2885.43 2569.04 2885.43 2569.93 2885.43 2571.07 2885.64 2571.89 2886.04 2572.41 2886.45 2572.93 2887.06 2573.19 2887.85 2573.19 2888.94 2573.19 2890.05 2572.8 2891.19 2572.03 2892.34 2571.26 2893.57 2570.07 2894.9 2568.45L2897.68 2571.31C2895.25 2573.98 2893.01 2575.84 2890.98 2576.89 2888.94 2577.94 2886.54 2578.46 2883.77 2578.46 2881.02 2578.46 2878.91 2577.76 2877.44 2576.35 2875.98 2574.94 2875.24 2572.99 2875.24 2570.51 2875.24 2569.38 2875.32 2568.3 2875.49 2567.28 2875.66 2566.25 2876.01 2564.57 2876.54 2562.24L2880.26 2546.56 2874.63 2546.56 2875.31 2543.63C2877 2543.63 2878.28 2543.47 2879.14 2543.16 2880.01 2542.85 2880.74 2542.38 2881.33 2541.75 2881.92 2541.12 2882.49 2540.17 2883.05 2538.9 2883.6 2537.62 2884.25 2535.66 2885 2533.01Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="45.4551" x="2874.63" y="2572.78">𝒕</text><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 2905.53 2557)">)</text><path d="M3302 2372 3523.76 2372" stroke="#404040" stroke-width="13.75" stroke-miterlimit="8" stroke-dasharray="13.75 13.75" fill="none" fill-rule="evenodd"/><path d="M1602.5 2218C1602.5 2100.64 1697.64 2005.5 1815 2005.5 1932.36 2005.5 2027.5 2100.64 2027.5 2218 2027.5 2335.36 1932.36 2430.5 1815 2430.5 1697.64 2430.5 1602.5 2335.36 1602.5 2218Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="#EF7F1A" fill-rule="evenodd"/><path d="M1342.5 1830.5C1342.5 1712.86 1437.64 1617.5 1555 1617.5 1672.36 1617.5 1767.5 1712.86 1767.5 1830.5 1767.5 1948.14 1672.36 2043.5 1555 2043.5 1437.64 2043.5 1342.5 1948.14 1342.5 1830.5Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="#EF7F1A" fill-rule="evenodd"/><path d="M1342.5 1361.5C1342.5 1243.86 1437.86 1148.5 1555.5 1148.5 1673.14 1148.5 1768.5 1243.86 1768.5 1361.5 1768.5 1479.14 1673.14 1574.5 1555.5 1574.5 1437.86 1574.5 1342.5 1479.14 1342.5 1361.5Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="#EF7F1A" fill-rule="evenodd"/><path d="M1604.5 971C1604.5 853.64 1699.86 758.5 1817.5 758.5 1935.14 758.5 2030.5 853.64 2030.5 971 2030.5 1088.36 1935.14 1183.5 1817.5 1183.5 1699.86 1183.5 1604.5 1088.36 1604.5 971Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="#EF7F1A" fill-rule="evenodd"/><path d="M1426 1307.5C1426 1286.79 1442.79 1270 1463.5 1270 1484.21 1270 1501 1286.79 1501 1307.5 1501 1328.21 1484.21 1345 1463.5 1345 1442.79 1345 1426 1328.21 1426 1307.5Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1426 1429.5C1426 1408.79 1442.79 1392 1463.5 1392 1484.21 1392 1501 1408.79 1501 1429.5 1501 1450.21 1484.21 1467 1463.5 1467 1442.79 1467 1426 1450.21 1426 1429.5Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1540 1463.5C1540 1447.21 1553.21 1434 1569.5 1434 1585.79 1434 1599 1447.21 1599 1463.5 1599 1479.79 1585.79 1493 1569.5 1493 1553.21 1493 1540 1479.79 1540 1463.5Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1539 1357.5C1539 1341.21 1552.21 1328 1568.5 1328 1584.79 1328 1598 1341.21 1598 1357.5 1598 1373.79 1584.79 1387 1568.5 1387 1552.21 1387 1539 1373.79 1539 1357.5Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1540 1250.5C1540 1234.21 1553.21 1221 1569.5 1221 1585.79 1221 1599 1234.21 1599 1250.5 1599 1266.79 1585.79 1280 1569.5 1280 1553.21 1280 1540 1266.79 1540 1250.5Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1641 1385C1641 1362.36 1659.36 1344 1682 1344 1704.64 1344 1723 1362.36 1723 1385 1723 1407.64 1704.64 1426 1682 1426 1659.36 1426 1641 1407.64 1641 1385Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M0 0 105.534 56.5998" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" transform="matrix(1 0 0 -1 1463 1307.6)"/><path d="M1463 1304 1568.53 1357.41" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1463 1308 1569.11 1468.65" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1465 1432 1572.52 1463.95" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><path d="M0 0 99.9935 72.9613" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" transform="matrix(1 0 0 -1 1467 1431.96)"/><path d="M1681.93 1387.6 1567 1256" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><path d="M0 0 116.809 78.7063" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" transform="matrix(1 0 0 -1 1568 1463.71)"/><path d="M1681.88 1387.38 1565 1358" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><path d="M0 0 98.1548 170.678" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" transform="matrix(1 0 0 -1 1464 1427.68)"/><path d="M1402 1834 1413 1799 1424 1834 1413 1869Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1424 1834 1435 1759 1446 1834 1435 1909Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1446 1834 1457 1734 1468 1834 1457 1934Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1466 1834 1477 1701 1488 1834 1477 1967Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1488 1834.5 1498.5 1796 1509 1834.5 1498.5 1873Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1509 1834 1520 1743 1531 1834 1520 1925Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1531 1834 1542 1701 1553 1834 1542 1967Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1553 1834 1564 1701 1575 1834 1564 1967Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1575 1834 1585.5 1734 1596 1834 1585.5 1934Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1595 1834 1605.5 1734 1616 1834 1605.5 1934Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1616 1834 1627 1701 1638 1834 1627 1967Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1638 1834 1649 1743 1660 1834 1649 1925Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1661 1834 1671.5 1759 1682 1834 1671.5 1909Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1682 1834 1693 1799 1704 1834 1693 1869Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1676 2197.5 1819.5 2159 1963 2197.5 1819.5 2236Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1739 2271.87 1739 2231C1739 2235.52 1774 2243.94 1817.9 2243.94 1861.8 2243.94 1898 2235.52 1898 2231L1898 2271.87C1898 2276.38 1863.61 2286 1819.71 2286 1775.81 2286 1739 2276.38 1739 2271.87Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1739 2246 1739 2226 1819 2246Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M0 19.9999 0 0 80.9998 19.9999Z" fill="#FFFFFF" fill-rule="evenodd" transform="matrix(-1 0 0 1 1898 2226)"/><path d="M1906 2297 1912.25 2204 1924.75 2204 1931 2297Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1732 993.581 1732 949C1732 953.928 1769.64 963.119 1816.85 963.119 1864.06 963.119 1903 953.928 1903 949L1903 993.581C1903 998.509 1866.01 1009 1818.8 1009 1771.59 1009 1732 998.509 1732 993.581Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1732 1055.58 1732 1011C1732 1015.93 1769.64 1025.12 1816.85 1025.12 1864.06 1025.12 1903 1015.93 1903 1011L1903 1055.58C1903 1060.51 1866.01 1071 1818.8 1071 1771.59 1071 1732 1060.51 1732 1055.58Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1732 932.325 1732 887C1732 892.01 1769.64 901.354 1816.85 901.354 1864.06 901.354 1903 892.01 1903 887L1903 932.325C1903 937.334 1866.01 948 1818.8 948 1771.59 948 1732 937.334 1732 932.325Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1732 882C1732 873.716 1770.28 867 1817.5 867 1864.72 867 1903 873.716 1903 882 1903 890.284 1864.72 897 1817.5 897 1770.28 897 1732 890.284 1732 882Z" fill="#FFFFFF" fill-rule="evenodd"/><g clip-path="url(#clip1)"><g clip-path="url(#clip2)"><g clip-path="url(#clip3)"><path d="M420.814 276.355C398.831 359.576 342.304 389.41 342.304 449.077" stroke="#2B2A29" stroke-width="1.75559" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 1.00194 2102.17 1236)"/><path d="M9.4212 277.925C29.8338 359.576 87.9312 389.41 87.9312 449.077" stroke="#2B2A29" stroke-width="1.75559" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 1.00194 2102.17 1236)"/><path d="M9.4212 276.355 9.4212 276.355C7.851 274.785 7.851 274.785 7.851 273.215L7.851 273.215C7.851 271.645 7.851 271.645 7.851 270.074L7.851 270.074 7.851 270.074 7.851 270.074C6.2808 268.504 6.2808 268.504 6.2808 266.934L6.2808 266.934C6.2808 265.364 6.2808 265.364 6.2808 263.794L6.2808 263.794C4.7106 262.223 4.7106 262.223 4.7106 260.653L4.7106 260.653 4.7106 260.653C4.7106 259.083 4.7106 259.083 4.7106 257.513L4.7106 257.513 4.7106 257.513C4.7106 255.943 3.1404 255.943 3.1404 254.372L3.1404 254.372C3.1404 252.802 3.1404 252.802 3.1404 251.232L3.1404 251.232C3.1404 249.662 3.1404 249.662 3.1404 248.092L3.1404 248.092C3.1404 246.521 1.5702 246.521 1.5702 244.951L1.5702 244.951C1.5702 243.381 1.5702 243.381 1.5702 241.811L1.5702 241.811C1.5702 240.241 1.5702 238.67 1.5702 238.67L1.5702 238.67 1.5702 238.67C1.5702 237.1 1.5702 235.53 1.5702 235.53L1.5702 235.53C1.5702 233.96 0 232.39 0 232.39L0 232.39C0 230.819 0 229.249 0 229.249L0 227.679 0 227.679C0 227.679 0 226.109 0 224.539L0 224.539 0 224.539C0 224.539 0 222.968 0 221.398L0 221.398C0 221.398 0 219.828 0 218.258L0 218.258C0 216.688 0 216.688 0 215.117 0 97.3524 95.7822 0 213.547 0 332.883 0 428.665 95.7822 428.665 213.547 428.665 215.117 428.665 216.688 428.665 216.688L428.665 218.258C428.665 218.258 428.665 219.828 428.665 221.398L428.665 221.398C428.665 221.398 428.665 222.968 428.665 224.539L428.665 224.539 428.665 224.539C428.665 224.539 428.665 226.109 428.665 227.679L428.665 227.679 428.665 227.679C428.665 229.249 428.665 229.249 428.665 230.819L428.665 230.819C428.665 232.39 428.665 232.39 428.665 233.96L428.665 233.96C428.665 235.53 428.665 235.53 428.665 237.1L428.665 237.1 428.665 237.1C428.665 238.67 428.665 238.67 428.665 240.241L427.095 240.241C427.095 241.811 427.095 241.811 427.095 243.381L427.095 243.381C427.095 244.951 427.095 246.521 427.095 246.521L427.095 246.521C427.095 248.092 427.095 249.662 427.095 249.662L427.095 249.662C427.095 251.232 425.524 252.802 425.524 252.802L425.524 252.802C425.524 254.372 425.524 255.943 425.524 255.943L425.524 255.943 425.524 255.943C425.524 257.513 425.524 259.083 423.954 259.083L423.954 259.083 423.954 259.083C423.954 260.653 423.954 262.223 423.954 262.223L423.954 262.223C423.954 263.794 423.954 265.364 423.954 265.364L423.954 265.364C422.384 266.934 422.384 268.504 422.384 268.504L422.384 268.504 422.384 268.504 422.384 268.504C422.384 270.074 422.384 271.645 422.384 271.645L422.384 271.645C420.814 273.215 420.814 273.215 420.814 274.785L420.814 274.785 420.814 276.355C400.401 356.435 345.444 387.84 342.304 444.367L342.304 449.077C342.304 511.885 290.487 563.702 227.679 565.272L226.109 565.272 204.126 565.272 204.126 565.272C141.318 565.272 87.9312 513.456 87.9312 449.077L87.9312 444.367C84.7908 387.84 29.8338 358.006 9.4212 277.925L9.4212 276.355ZM3.1404 248.092C3.1404 246.521 1.5702 246.521 1.5702 244.951" fill="#FFED00" fill-rule="evenodd" transform="matrix(1 0 0 1.00194 2102.17 1236)"/><path d="M105.956 0C116.885-3.90581e-15 125.745 7.54002 125.745 16.8411L125.745 16.8411C125.745 26.1422 116.885 33.6822 105.956 33.6822L19.789 33.6822C8.85983 33.6822-2.09692e-06 26.1422-2.09692e-06 16.8411L-2.09692e-06 16.8411C-2.09692e-06 7.54002 8.85983-3.28797e-07 19.789-3.28797e-07Z" stroke="#5B5B5B" stroke-width="3.68688" fill="#5B5B5B" fill-rule="evenodd" transform="matrix(1.42824 -0.00348441 0.00243019 0.999997 2228.8 1831.52)"/><path d="M105.956 0C116.885-3.90581e-15 125.745 7.54002 125.745 16.8411L125.745 16.8411C125.745 26.1422 116.885 33.6822 105.956 33.6822L19.789 33.6822C8.85983 33.6822-2.09692e-06 26.1422-2.09692e-06 16.8411L-2.09692e-06 16.8411C-2.09692e-06 7.54002 8.85983-3.28797e-07 19.789-3.28797e-07Z" stroke="#5B5B5B" stroke-width="3.68688" fill="#5B5B5B" fill-rule="evenodd" transform="matrix(1.28542 -0.00313597 0.00243019 0.999997 2237.92 1886.34)"/><path d="M75.666 0C83.4708-2.78925e-15 89.7979 5.38453 89.7979 12.0267L89.7979 12.0267C89.7979 18.6689 83.4708 24.0534 75.666 24.0534L14.1319 24.0534C6.32705 24.0534-1.49747e-06 18.6689-1.49747e-06 12.0267L-1.49747e-06 12.0267C-1.49747e-06 5.38453 6.32705-2.34803e-07 14.1319-2.34803e-07Z" stroke="#5B5B5B" stroke-width="2.6329" fill="#5B5B5B" fill-rule="evenodd" transform="matrix(0.999997 -0.00243963 0.00340302 1.40031 2273.97 1941.1)"/><path d="M315.61 211.977C317.18 200.986 320.321 179.003 310.9 172.722 296.768 177.433 288.917 196.275 265.364 189.994 233.96 180.573 229.249 127.186 265.364 113.054 287.347 105.203 298.338 130.327 310.9 130.327 321.891 122.476 315.61 80.0802 315.61 65.9484L312.47 54.957 310.9 43.9656C315.61 40.8252 326.602 39.255 336.023 37.6848 301.478 14.1318 259.083 0 213.547 0 95.7822 0 0 97.3524 0 215.117L0 218.258 0 218.258 0 221.398 0 221.398 0 224.539 0 224.539 0 224.539 0 227.679 0 227.679 0 229.249 0 232.39 0 232.39 1.5702 235.53 1.5702 235.53 1.5702 238.67 1.5702 238.67 1.5702 238.67 1.5702 241.811 1.5702 241.811 1.5702 244.951 1.5702 244.951 3.1404 248.092 3.1404 248.092 3.1404 251.232 3.1404 251.232 3.1404 254.372 3.1404 254.372 4.7106 257.513 4.7106 257.513 4.7106 257.513 4.7106 260.653 4.7106 260.653 4.7106 260.653 6.2808 263.794 6.2808 263.794 6.2808 266.934 6.2808 266.934 7.851 270.074 7.851 270.074 7.851 270.074 7.851 270.074 7.851 273.215 7.851 273.215 9.4212 276.355 9.4212 276.355 9.4212 277.925C29.8338 358.006 84.7908 387.84 87.9312 444.367L87.9312 449.077C87.9312 513.456 141.318 565.272 204.126 565.272L204.126 565.272 226.109 565.272 227.679 565.272C290.487 563.702 342.304 511.885 342.304 449.077L342.304 444.367C345.444 387.84 400.401 356.435 420.814 276.355L420.814 274.785 420.814 274.785 422.384 271.645 422.384 271.645 422.384 268.504 422.384 268.504 422.384 268.504 422.384 268.504 423.954 265.364 423.954 265.364 423.954 262.223 423.954 262.223 423.954 259.083 423.954 259.083 423.954 259.083 425.524 255.943 425.524 255.943 425.524 255.943 425.524 252.802 425.524 252.802 427.095 249.662 427.095 249.662 427.095 246.521 427.095 246.521 427.095 243.381 427.095 243.381 427.095 240.241 428.665 240.241 428.665 237.1 428.665 237.1 428.665 237.1 428.665 233.96 428.665 233.96 428.665 230.819 428.665 230.819 428.665 227.679 428.665 227.679 428.665 227.679 428.665 224.539 428.665 224.539 428.665 224.539 428.665 221.398 428.665 221.398 428.665 218.258 428.665 216.688 428.665 213.547C428.665 205.696 428.665 197.845 428.665 189.994 420.814 189.994 416.103 189.994 408.252 189.994 394.12 194.705 383.129 205.696 383.129 219.828 381.559 238.67 400.401 241.811 400.401 257.513 397.261 277.925 317.18 263.794 310.9 260.653L310.9 251.232C314.04 237.1 315.61 226.109 315.61 211.977Z" fill="#FFED00" fill-rule="evenodd" transform="matrix(1 0 0 1.00194 2102.17 1236)"/><path d="M177.739 0C177.739 98.1627 98.1627 177.739-3.25913e-14 177.739-98.1627 177.739-177.739 98.1627-177.739 1.62957e-14-177.739-98.1627-98.1627-177.739-1.62957e-14-177.739 98.1627-177.739 177.739-98.1627 177.739 0Z" fill="#FEC20E" fill-rule="evenodd" transform="matrix(0.454324 -0.892116 0.890388 0.455206 2316.84 1451.67)"/><path d="M107.122 0C157.443-1.74976e-14 198.237 40.7936 198.237 91.1151L198.237 189.618C198.237 239.939 157.443 280.733 107.122 280.733L91.1151 280.733C40.7936 280.733 0 239.939 0 189.618L0 91.1151C-3.49952e-14 40.7936 40.7936 0 91.1151 0Z" fill="#FEC20E" fill-rule="evenodd" transform="matrix(1.02003 -0.018691 0.0182487 0.999833 2212.29 1436.05)"/><path d="M202.556 248.092 221.398 248.092C271.645 246.521 312.47 287.347 314.04 337.593L315.61 383.129C318.751 434.946 276.355 477.341 226.109 477.341L207.266 477.341C157.02 478.911 114.625 438.086 113.054 387.84L111.484 342.304C109.914 290.487 152.309 248.092 202.556 248.092Z" fill="#EF7F1A" fill-rule="evenodd" transform="matrix(1 0 0 1.00194 2102.17 1236)"/><path d="M160.16 233.96C131.897 249.662 113.054 279.496 113.054 314.04L113.054 359.576C131.897 373.708 152.309 383.129 174.292 387.84 180.573 389.41 186.854 390.98 194.705 392.55 200.986 392.55 207.266 392.55 213.547 392.55 213.547 392.55 215.117 392.55 215.117 392.55 252.802 392.55 287.347 379.988 317.18 359.576L315.61 314.04C315.61 299.908 312.47 285.776 307.759 274.785 292.057 243.381 260.653 221.398 224.539 221.398L215.117 221.398 205.696 221.398C188.424 221.398 174.292 226.109 160.16 233.96Z" fill="#E31E24" fill-rule="evenodd" transform="matrix(1 0 0 1.00194 2102.17 1236)"/></g></g></g><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="147" transform="matrix(1 0 0 1 2816.15 1994)">agent<tspan font-size="147" x="2208.12" y="4">env</tspan></text><rect x="4963" y="1520" width="145" height="145" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 5005.07 1633)">1</text><rect x="2730" y="1530" width="145" height="146" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 2771.99 1644)">6</text><rect x="1520" y="892" width="145" height="146" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 1561.96 1006)">2</text><rect x="1260" y="1285" width="146" height="146" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 1302.32 1399)">3</text><rect x="1259" y="1761" width="145" height="146" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 1300.63 1875)">4</text><rect x="1520" y="2149" width="145" height="145" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 1561.91 2262)">5</text><rect x="3283" y="2330" width="145" height="146" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 3324.61 2444)">7</text></g></svg>
\ No newline at end of file
+<svg width="4300" height="2102" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="1067" y="547" width="4300" height="2102"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-1067 -547)"><path d="M1771 1604.5C1771 1303.78 2014.78 1060 2315.5 1060 2616.22 1060 2860 1303.78 2860 1604.5 2860 1905.22 2616.22 2149 2315.5 2149 2014.78 2149 1771 1905.22 1771 1604.5Z" fill="#FFC000" fill-rule="evenodd"/><path d="M3996 1608.5C3996 1307.78 4239.78 1064 4540.5 1064 4841.22 1064 5085 1307.78 5085 1608.5 5085 1909.22 4841.22 2153 4540.5 2153 4239.78 2153 3996 1909.22 3996 1608.5Z" fill="#BDD7EE" fill-rule="evenodd"/><path d="M4047 1604C4047 1330.62 4268.62 1109 4542 1109 4815.38 1109 5037 1330.62 5037 1604 5037 1877.38 4815.38 2099 4542 2099 4268.62 2099 4047 1877.38 4047 1604Z" stroke="#404040" stroke-width="13.75" stroke-miterlimit="8" fill="#BDD7EE" fill-rule="evenodd"/><path d="M4158.5 1604.5C4158.5 1331.12 4330.42 1109.5 4542.5 1109.5 4754.58 1109.5 4926.5 1331.12 4926.5 1604.5 4926.5 1877.88 4754.58 2099.5 4542.5 2099.5 4330.42 2099.5 4158.5 1877.88 4158.5 1604.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4297.5 1604.5C4297.5 1331.12 4407.19 1109.5 4542.5 1109.5 4677.81 1109.5 4787.5 1331.12 4787.5 1604.5 4787.5 1877.88 4677.81 2099.5 4542.5 2099.5 4407.19 2099.5 4297.5 1877.88 4297.5 1604.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4456.5 1604.5C4456.5 1331.12 4495 1109.5 4542.5 1109.5 4590 1109.5 4628.5 1331.12 4628.5 1604.5 4628.5 1877.88 4590 2099.5 4542.5 2099.5 4495 2099.5 4456.5 1877.88 4456.5 1604.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4542.5 1220.5C4815.88 1220.5 5037.5 1392.42 5037.5 1604.5 5037.5 1816.58 4815.88 1988.5 4542.5 1988.5 4269.12 1988.5 4047.5 1816.58 4047.5 1604.5 4047.5 1392.42 4269.12 1220.5 4542.5 1220.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4542.5 1359.5C4815.88 1359.5 5037.5 1469.19 5037.5 1604.5 5037.5 1739.81 4815.88 1849.5 4542.5 1849.5 4269.12 1849.5 4047.5 1739.81 4047.5 1604.5 4047.5 1469.19 4269.12 1359.5 4542.5 1359.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4542.5 1518.5C4815.88 1518.5 5037.5 1557 5037.5 1604.5 5037.5 1652 4815.88 1690.5 4542.5 1690.5 4269.12 1690.5 4047.5 1652 4047.5 1604.5 4047.5 1557 4269.12 1518.5 4542.5 1518.5Z" stroke="#404040" stroke-width="4.58333" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4672.2 1128.93C4937.44 1225.75 4966.38 1367.93 4976.02 1364.4 5033.28 1466.82 5055.76 1614.69 5022.52 1719.37 5006.63 1807.64 5001.24 1781.58 4990.69 1789.82 4982.42 1790.79 4965.63 1796.29 4952.44 1792.92 4939.25 1789.54 4935.9 1784.4 4911.57 1769.55 4887.25 1754.7 4827.16 1722.82 4806.48 1703.83 4785.81 1684.85 4788.73 1669.03 4787.51 1655.64 4786.29 1642.26 4790.43 1638.36 4799.19 1623.52 4807.94 1608.67 4829.35 1587.98 4840.05 1566.56 4850.76 1545.15 4864.87 1513.02 4863.41 1495.01 4861.95 1477 4844.19 1469.45 4831.3 1458.5 4818.4 1447.55 4801.38 1439.27 4786.05 1429.29 4770.72 1419.31 4750.29 1408.6 4739.34 1398.62 4728.4 1388.64 4727.89 1380.9 4720.37 1369.42 4714.82 1360.94 4713.56 1360.9 4702.85 1344.59 4692.15 1328.28 4665.88 1288.85 4656.15 1271.57 4646.17 1253.08 4642.77 1246.02 4643.01 1233.61 4643.26 1221.19 4654.69 1205.86 4657.61 1197.1 4662.23 1179.33 4668.55 1138.68 4670.74 1127L4670.74 1127" fill="#70AD47" fill-rule="evenodd"/><path d="M5002.5 1787.54 5002.5 1787.54C4981.85 1793.18 4967.96 1796.31 4952.88 1793.39 4937.8 1790.47 4936.35 1784.87 4912.02 1770.02 4887.7 1755.18 4827.63 1723.29 4806.95 1704.31 4786.28 1685.33 4789.2 1669.51 4787.98 1656.12 4786.77 1642.73 4790.9 1638.84 4799.66 1623.99 4808.41 1609.15 4829.82 1588.46 4840.52 1567.04 4851.22 1545.63 4865.33 1513.5 4863.87 1495.49 4862.41 1477.48 4844.65 1469.93 4831.76 1458.98 4818.87 1448.03 4801.85 1439.76 4786.52 1429.78 4771.2 1419.8 4750.77 1409.09 4739.83 1399.11 4728.88 1389.13 4728.38 1381.39 4720.86 1369.91 4715.31 1361.43 4714.04 1361.39 4703.34 1345.08 4692.64 1328.78 4666.37 1289.35 4656.65 1272.07 4646.67 1253.57 4643.27 1246.51 4643.51 1234.1 4643.76 1221.69 4655.19 1206.35 4658.1 1197.59 4662.73 1179.83 4669.05 1139.18 4671.24 1127.5L4671.24 1127.5" stroke="#385723" stroke-width="20.625" stroke-linecap="round" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4048.9 1658.34C4050.22 1657.6 4013.53 1393.27 4228.11 1225.64 4231.09 1225.1 4238.26 1213.46 4238.09 1226.86 4237.91 1240.26 4221.05 1286.15 4227.07 1306.04 4233.09 1325.93 4240.77 1357.4 4274.22 1346.22 4307.68 1335.04 4399.15 1246.62 4427.79 1238.97 4492.21 1221.76 4457.74 1278.99 4447.48 1305.91 4437.97 1330.87 4394.54 1345.72 4404.96 1397.55 4415.39 1449.39 4514.18 1556.15 4510.02 1616.9 4505.86 1677.65 4420.77 1725.93 4380 1762.03 4339.24 1798.13 4297.41 1843.6 4265.43 1833.52 4233.44 1823.43 4223.35 1729.11 4188.09 1701.52 4152.83 1673.92 4088.39 1668.59 4053.89 1667.93" fill="#70AD47" fill-rule="evenodd"/><path d="M4238.27 1220.68C4239.42 1233.33 4221.23 1279.96 4227.25 1299.86 4233.28 1319.75 4240.95 1351.21 4274.41 1340.03 4307.86 1328.85 4399.34 1240.43 4427.98 1232.78 4492.4 1215.57 4457.92 1272.81 4447.67 1299.73 4438.16 1324.68 4394.73 1339.54 4405.15 1391.37 4415.57 1443.2 4514.37 1549.97 4510.21 1610.71 4506.04 1671.46 4420.95 1719.74 4380.19 1755.84 4339.42 1791.95 4297.6 1837.42 4265.61 1827.33 4233.63 1817.25 4223.54 1722.93 4188.28 1695.33 4153.02 1667.73 4088.57 1662.4 4054.07 1661.75" stroke="#385723" stroke-width="20.625" stroke-linecap="round" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4589.23 2095.01C4592.57 2086.67 4594.98 2082.27 4593.61 2073.13 4592.24 2063.98 4582.89 2047.64 4581 2040.12 4588.97 2005.67 4597.6 2022.4 4603.85 2013.31 4610.09 2004.22 4613.11 1993.37 4618.47 1985.59 4623.83 1977.81 4628.95 1970.03 4636.01 1966.62 4643.08 1963.22 4652.58 1965.65 4660.87 1965.17 4664.28 1962.73 4667.58 1960.13 4671.1 1957.87 4700.72 1938.87 4700.72 1952.75 4760.29 1954.95 4769.06 1957.38 4776.13 1962.98 4786.61 1962.25 4797.08 1961.52 4808.67 1951.76 4823.16 1950.58L4873.56 1955.14C4877.03 1958.76 4895.61 1954.68 4872.87 1971.62 4758.11 2076.26 4645.41 2086.1 4592.6 2098" fill="#70AD47" fill-rule="evenodd"/><path d="M4590.7 2095.5C4594.03 2087.16 4596.43 2082.77 4595.07 2073.62 4593.7 2064.47 4584.38 2048.14 4582.5 2040.61 4590.44 2006.16 4599.04 2022.89 4605.26 2013.81 4611.48 2004.72 4614.49 1993.87 4619.83 1986.09 4625.17 1978.31 4630.27 1970.53 4637.31 1967.12 4644.35 1963.72 4653.82 1966.15 4662.08 1965.66 4665.47 1963.23 4668.76 1960.63 4672.27 1958.37 4701.79 1939.37 4701.78 1953.25 4761.13 1955.45 4769.87 1957.88 4776.92 1963.48 4787.35 1962.75 4797.79 1962.02 4814.79 1952.54 4823.77 1951.08L4883.5 1958.37" stroke="#385723" stroke-width="20.625" stroke-linecap="round" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M4046.5 1606.5C4046.5 1333.12 4268.12 1111.5 4541.5 1111.5 4814.88 1111.5 5036.5 1333.12 5036.5 1606.5 5036.5 1879.88 4814.88 2101.5 4541.5 2101.5 4268.12 2101.5 4046.5 1879.88 4046.5 1606.5Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M1870 1603C1870 1356.68 2069.68 1157 2316 1157 2562.32 1157 2762 1356.68 2762 1603 2762 1849.32 2562.32 2049 2316 2049 2069.68 2049 1870 1849.32 1870 1603Z" fill="#EF7F1A" fill-rule="evenodd"/><path d="M1821.5 1603.5C1821.5 1330.12 2043.12 1108.5 2316.5 1108.5 2589.88 1108.5 2811.5 1330.12 2811.5 1603.5 2811.5 1876.88 2589.88 2098.5 2316.5 2098.5 2043.12 2098.5 1821.5 1876.88 1821.5 1603.5Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 6.875-189.063 6.875-182.188 0-182.188 2228.27-189.063 2221.39-30.6076 2221.39-30.6076 2235.14-195.938 2235.14-195.938-6.875 0-6.875ZM-37.4826 2207.64 3.76736 2228.27-37.4826 2248.89Z" fill="#404040" transform="matrix(1.83697e-16 1 1 -1.83697e-16 2314 1033)"/><path d="M4514.32 2175 4514.32 2372.3 3523 2372.3" stroke="#404040" stroke-width="13.75" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 3292.07 787)">action (</text><path d="M3686.48 742.788C3683.42 742.788 3680.71 744.325 3678.34 747.399 3675.97 750.473 3674.07 754.608 3672.62 759.802 3671.17 764.997 3670.45 769.649 3670.45 773.759 3670.45 776.159 3670.8 777.951 3671.51 779.134 3672.22 780.318 3673.31 780.91 3674.79 780.91 3676.96 780.91 3679.17 779.849 3681.42 777.729 3683.68 775.608 3685.49 773.118 3686.87 770.257 3688.25 767.397 3689.49 763.484 3690.57 758.52L3690.97 756.695C3691.49 754.262 3691.76 751.829 3691.76 749.396 3691.76 747.227 3691.34 745.583 3690.52 744.465 3689.7 743.347 3688.35 742.788 3686.48 742.788ZM3685.89 737.659C3690.79 737.659 3694.96 738.843 3698.41 741.21L3703.2 737.659 3708.72 738.448 3700.73 771.983C3700.24 774.055 3699.99 775.929 3699.99 777.605 3699.99 778.69 3700.17 779.529 3700.53 780.121 3700.9 780.712 3701.52 781.008 3702.41 781.008 3703.43 781.008 3704.53 780.548 3705.71 779.627 3706.9 778.707 3708.28 777.342 3709.85 775.534L3713.41 779.085C3709.95 782.537 3706.98 784.912 3704.48 786.211 3701.98 787.51 3699.35 788.159 3696.59 788.159 3693.83 788.159 3691.64 787.395 3690.03 785.866 3688.42 784.337 3687.61 782.323 3687.61 779.825 3687.61 779.003 3687.71 778.214 3687.91 777.457L3687.22 777.359C3684.82 780.186 3682.76 782.315 3681.03 783.745 3679.3 785.175 3677.47 786.269 3675.53 787.025 3673.59 787.781 3671.44 788.159 3669.07 788.159 3665.12 788.159 3662.13 786.819 3660.09 784.14 3658.06 781.46 3657.04 777.556 3657.04 772.427 3657.04 767.989 3657.68 763.583 3658.96 759.21 3660.24 754.838 3662.08 751.065 3664.48 747.892 3666.88 744.72 3669.9 742.221 3673.53 740.396 3677.17 738.571 3681.28 737.659 3685.89 737.659Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="50.5" x="3657.04" y="781.847">𝒂</text><path d="M3728.9 763.072 3737.58 763.072 3735.48 772.105 3744.26 772.105 3743.29 776.622 3734.43 776.622 3730.93 791.075C3730.33 793.556 3729.91 795.441 3729.68 796.73 3729.45 798.019 3729.34 799.109 3729.34 800 3729.34 801.132 3729.54 801.957 3729.95 802.475 3730.36 802.993 3730.96 803.252 3731.76 803.252 3732.84 803.252 3733.96 802.867 3735.1 802.096 3736.25 801.325 3737.48 800.133 3738.8 798.519L3741.59 801.373C3739.15 804.047 3736.92 805.908 3734.88 806.956 3732.85 808.003 3730.45 808.527 3727.68 808.527 3724.93 808.527 3722.82 807.823 3721.35 806.414 3719.88 805.004 3719.15 803.059 3719.15 800.578 3719.15 799.446 3719.23 798.368 3719.4 797.344 3719.57 796.32 3719.92 794.64 3720.45 792.304L3724.17 776.622 3718.53 776.622 3719.22 773.695C3720.91 773.695 3722.18 773.539 3723.05 773.226 3723.92 772.912 3724.65 772.443 3725.24 771.816 3725.83 771.19 3726.4 770.239 3726.95 768.962 3727.51 767.685 3728.16 765.722 3728.9 763.072Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="45.4551" x="3718.53" y="802.845">𝒕</text><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 3749.44 787)">)<tspan font-size="101" x="155.055" y="1531">state (</tspan></text><path d="M4233.73 2268.15C4236.26 2268.15 4238.98 2268.35 4241.89 2268.77 4244.8 2269.18 4247.45 2269.73 4249.85 2270.42L4247.29 2282.2 4241.02 2282.2C4240.73 2279.15 4239.95 2276.89 4238.68 2275.45 4237.42 2274 4235.6 2273.28 4233.23 2273.28 4231.03 2273.28 4229.26 2273.85 4227.93 2274.98 4226.6 2276.11 4225.93 2277.65 4225.93 2279.59 4225.93 2281.3 4226.49 2282.8 4227.61 2284.08 4228.73 2285.36 4230.9 2286.97 4234.12 2288.91 4237.6 2290.98 4240.1 2293.12 4241.62 2295.32 4243.13 2297.53 4243.88 2300.11 4243.88 2303.07 4243.88 2306.42 4242.99 2309.28 4241.2 2311.65 4239.4 2314.01 4236.96 2315.77 4233.85 2316.92 4230.74 2318.07 4227.2 2318.65 4223.22 2318.65 4216.84 2318.65 4210.73 2317.86 4204.88 2316.28L4207.64 2304.05 4213.9 2304.05C4214.06 2307.01 4214.83 2309.33 4216.19 2311.01 4217.56 2312.68 4219.65 2313.52 4222.48 2313.52 4225.08 2313.52 4227.08 2312.89 4228.5 2311.62 4229.91 2310.36 4230.62 2308.57 4230.62 2306.27 4230.62 2304.89 4230.39 2303.72 4229.93 2302.77 4229.47 2301.82 4228.71 2300.89 4227.66 2299.98 4226.61 2299.08 4224.9 2297.9 4222.53 2296.46 4219.24 2294.48 4216.85 2292.44 4215.35 2290.32 4213.86 2288.2 4213.11 2285.82 4213.11 2283.19 4213.11 2278.36 4214.92 2274.64 4218.54 2272.05 4222.15 2269.45 4227.22 2268.15 4233.73 2268.15Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="50.5" x="4204.88" y="2312.34">𝒔</text><path d="M4267.42 2293.56 4276.1 2293.56 4274 2302.6 4282.78 2302.6 4281.8 2307.11 4272.95 2307.11 4269.45 2321.57C4268.84 2324.05 4268.43 2325.93 4268.2 2327.22 4267.97 2328.51 4267.86 2329.6 4267.86 2330.49 4267.86 2331.62 4268.06 2332.45 4268.47 2332.97 4268.88 2333.48 4269.48 2333.74 4270.28 2333.74 4271.36 2333.74 4272.48 2333.36 4273.62 2332.59 4274.76 2331.82 4276 2330.62 4277.32 2329.01L4280.11 2331.86C4277.67 2334.54 4275.44 2336.4 4273.4 2337.45 4271.37 2338.49 4268.96 2339.02 4266.19 2339.02 4263.45 2339.02 4261.34 2338.31 4259.87 2336.9 4258.4 2335.49 4257.67 2333.55 4257.67 2331.07 4257.67 2329.94 4257.75 2328.86 4257.92 2327.83 4258.09 2326.81 4258.44 2325.13 4258.97 2322.79L4262.69 2307.11 4257.05 2307.11 4257.74 2304.19C4259.43 2304.19 4260.7 2304.03 4261.57 2303.72 4262.44 2303.4 4263.17 2302.93 4263.76 2302.31 4264.35 2301.68 4264.92 2300.73 4265.47 2299.45 4266.03 2298.18 4266.68 2296.21 4267.42 2293.56Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="45.4551" x="4257.05" y="2333.34">𝒕</text><path d="M4307.76 2295.51 4313.61 2295.51 4313.61 2314.52 4331.49 2314.52 4331.49 2320.05 4313.61 2320.05 4313.61 2339.05 4307.76 2339.05 4307.76 2320.05 4289.87 2320.05 4289.87 2314.52 4307.76 2314.52Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="43.54" x="4289.87" y="2333.61">+</text><path d="M4361.83 2288.54 4366.46 2288.54C4366.31 2290.85 4366.24 2294.06 4366.24 2298.15L4366.24 2328.36C4366.24 2329.76 4366.32 2330.81 4366.47 2331.52 4366.63 2332.23 4366.91 2332.81 4367.31 2333.25 4367.7 2333.7 4368.28 2334.05 4369.02 2334.3 4369.77 2334.56 4370.71 2334.75 4371.84 2334.88 4372.97 2335.01 4374.47 2335.1 4376.32 2335.15L4376.32 2338.44 4345.14 2338.44 4345.14 2335.15C4347.81 2335.03 4349.72 2334.87 4350.87 2334.66 4352.01 2334.46 4352.88 2334.15 4353.48 2333.74 4354.09 2333.33 4354.53 2332.75 4354.8 2332.01 4355.08 2331.26 4355.22 2330.04 4355.22 2328.36L4355.22 2302.23C4355.22 2301.25 4355.03 2300.54 4354.66 2300.1 4354.29 2299.67 4353.77 2299.45 4353.12 2299.45 4352.52 2299.45 4351.67 2299.73 4350.58 2300.28 4349.48 2300.84 4347.68 2301.97 4345.17 2303.68L4342.97 2299.85Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="49.8994" x="4342.97" y="2332.2">𝟏</text><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 4385.35 2318)">)<tspan font-size="101" x="-529.834" y="242">reward (</tspan></text><path d="M4265.92 2510.15C4268.65 2510.15 4270.83 2510.91 4272.45 2512.44 4274.08 2513.97 4274.89 2515.99 4274.89 2518.48 4274.89 2519.37 4274.71 2520.49 4274.35 2521.84L4274.75 2521.99C4277.51 2518.11 4280.4 2515.16 4283.43 2513.16 4286.45 2511.15 4289.8 2510.15 4293.49 2510.15 4295.66 2510.15 4297.96 2510.46 4300.39 2511.09L4297.48 2524.35 4288.65 2524.35C4288.65 2522.15 4288.42 2520.62 4287.94 2519.77 4287.46 2518.91 4286.65 2518.48 4285.5 2518.48 4284.61 2518.48 4283.64 2518.8 4282.59 2519.42 4281.54 2520.05 4280.28 2521.26 4278.81 2523.07 4277.35 2524.88 4276.25 2526.6 4275.51 2528.25 4274.77 2529.89 4274.12 2532 4273.56 2534.56L4267.99 2559.86 4253.84 2559.86 4261.78 2525.59C4262.27 2523.45 4262.52 2521.82 4262.52 2520.7 4262.52 2519.62 4262.34 2518.78 4261.97 2518.19 4261.61 2517.6 4260.99 2517.3 4260.1 2517.3 4259.08 2517.3 4257.98 2517.76 4256.8 2518.68 4255.61 2519.6 4254.23 2520.97 4252.65 2522.77L4249.1 2519.22C4252.39 2515.87 4255.31 2513.52 4257.86 2512.17 4260.4 2510.82 4263.09 2510.15 4265.92 2510.15Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="49.7109" x="4249.1" y="2553.65">𝒓</text><path d="M4316.42 2535.56 4325.09 2535.56 4323 2544.6 4331.78 2544.6 4330.8 2549.11 4321.95 2549.11 4318.44 2563.57C4317.84 2566.05 4317.43 2567.93 4317.2 2569.22 4316.97 2570.51 4316.85 2571.6 4316.85 2572.49 4316.85 2573.62 4317.06 2574.45 4317.47 2574.97 4317.88 2575.48 4318.48 2575.74 4319.27 2575.74 4320.36 2575.74 4321.47 2575.36 4322.62 2574.59 4323.76 2573.82 4325 2572.62 4326.32 2571.01L4329.1 2573.86C4326.67 2576.54 4324.44 2578.4 4322.4 2579.45 4320.36 2580.49 4317.96 2581.02 4315.19 2581.02 4312.44 2581.02 4310.34 2580.31 4308.87 2578.9 4307.4 2577.49 4306.66 2575.55 4306.66 2573.07 4306.66 2571.94 4306.75 2570.86 4306.92 2569.83 4307.08 2568.81 4307.43 2567.13 4307.96 2564.79L4311.69 2549.11 4306.05 2549.11 4306.74 2546.19C4308.42 2546.19 4309.7 2546.03 4310.57 2545.72 4311.43 2545.4 4312.16 2544.93 4312.75 2544.31 4313.34 2543.68 4313.91 2542.73 4314.47 2541.45 4315.02 2540.18 4315.67 2538.21 4316.42 2535.56Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="45.4551" x="4306.05" y="2575.34">𝒕</text><path d="M4356.75 2537.51 4362.6 2537.51 4362.6 2556.52 4380.49 2556.52 4380.49 2562.05 4362.6 2562.05 4362.6 2581.05 4356.75 2581.05 4356.75 2562.05 4338.87 2562.05 4338.87 2556.52 4356.75 2556.52Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="43.54" x="4338.87" y="2575.61">+</text><path d="M4410.83 2530.54 4415.45 2530.54C4415.31 2532.85 4415.24 2536.06 4415.24 2540.15L4415.24 2570.36C4415.24 2571.76 4415.31 2572.81 4415.47 2573.52 4415.63 2574.23 4415.9 2574.81 4416.3 2575.25 4416.7 2575.7 4417.27 2576.05 4418.02 2576.3 4418.76 2576.56 4419.7 2576.75 4420.84 2576.88 4421.97 2577.01 4423.46 2577.1 4425.32 2577.15L4425.32 2580.44 4394.13 2580.44 4394.13 2577.15C4396.81 2577.03 4398.72 2576.87 4399.86 2576.66 4401.01 2576.46 4401.88 2576.15 4402.48 2575.74 4403.08 2575.33 4403.52 2574.75 4403.8 2574.01 4404.08 2573.26 4404.22 2572.04 4404.22 2570.36L4404.22 2544.23C4404.22 2543.25 4404.03 2542.54 4403.66 2542.1 4403.28 2541.67 4402.77 2541.45 4402.12 2541.45 4401.52 2541.45 4400.67 2541.73 4399.57 2542.28 4398.48 2542.84 4396.68 2543.97 4394.17 2545.68L4391.97 2541.85Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="49.8994" x="4391.97" y="2574.2">𝟏</text><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 4434.35 2560)">)</text><path d="M3301.07 2378.7 2278.12 2378.7 2278.12 2209.38 2291.87 2209.38 2291.87 2371.83 2285 2364.95 3301.07 2364.95ZM2264.37 2216.25 2285 2175 2305.62 2216.25Z" fill="#404040"/><path d="M3364.04 2440.68 2335.12 2440.68 2335.12 2209.38 2348.88 2209.38 2348.88 2433.81 2342 2426.93 3364.04 2426.93ZM2321.38 2216.25 2342 2175 2362.62 2216.25Z" fill="#404040"/><path d="M4572.17 2176 4572.17 2433.91 3579 2433.91" stroke="#404040" stroke-width="13.75" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3364 2434 3578.96 2434" stroke="#404040" stroke-width="13.75" stroke-miterlimit="8" stroke-dasharray="13.75 13.75" fill="none" fill-rule="evenodd"/><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 2473.07 2315)">state (</text><path d="M2802.3 2265.59C2804.83 2265.59 2807.55 2265.8 2810.46 2266.21 2813.37 2266.62 2816.03 2267.17 2818.43 2267.86L2815.86 2279.65 2809.6 2279.65C2809.31 2276.59 2808.52 2274.34 2807.26 2272.89 2805.99 2271.44 2804.18 2270.72 2801.81 2270.72 2799.61 2270.72 2797.84 2271.29 2796.51 2272.42 2795.18 2273.56 2794.51 2275.09 2794.51 2277.03 2794.51 2278.74 2795.07 2280.24 2796.19 2281.52 2797.31 2282.8 2799.48 2284.42 2802.7 2286.35 2806.18 2288.43 2808.68 2290.56 2810.19 2292.77 2811.71 2294.97 2812.46 2297.55 2812.46 2300.51 2812.46 2303.86 2811.57 2306.72 2809.77 2309.09 2807.98 2311.46 2805.53 2313.22 2802.43 2314.37 2799.32 2315.52 2795.78 2316.09 2791.8 2316.09 2785.42 2316.09 2779.3 2315.3 2773.45 2313.73L2776.21 2301.49 2782.48 2301.49C2782.64 2304.45 2783.41 2306.77 2784.77 2308.45 2786.14 2310.13 2788.23 2310.96 2791.06 2310.96 2793.66 2310.96 2795.66 2310.33 2797.07 2309.06 2798.49 2307.8 2799.2 2306.02 2799.2 2303.71 2799.2 2302.33 2798.97 2301.17 2798.51 2300.21 2798.04 2299.26 2797.29 2298.33 2796.24 2297.43 2795.18 2296.52 2793.47 2295.35 2791.11 2293.9 2787.82 2291.93 2785.43 2289.88 2783.93 2287.76 2782.44 2285.64 2781.69 2283.26 2781.69 2280.63 2781.69 2275.8 2783.5 2272.09 2787.11 2269.49 2790.73 2266.89 2795.79 2265.59 2802.3 2265.59Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="50.5" x="2773.45" y="2309.78">𝒔</text><path d="M2836 2291.01 2844.67 2291.01 2842.58 2300.04 2851.36 2300.04 2850.38 2304.56 2841.53 2304.56 2838.02 2319.01C2837.42 2321.49 2837.01 2323.37 2836.78 2324.66 2836.55 2325.95 2836.43 2327.04 2836.43 2327.93 2836.43 2329.07 2836.64 2329.89 2837.05 2330.41 2837.46 2330.93 2838.06 2331.19 2838.86 2331.19 2839.94 2331.19 2841.05 2330.8 2842.2 2330.03 2843.34 2329.26 2844.58 2328.07 2845.9 2326.45L2848.68 2329.31C2846.25 2331.98 2844.02 2333.84 2841.98 2334.89 2839.95 2335.94 2837.54 2336.46 2834.77 2336.46 2832.03 2336.46 2829.92 2335.76 2828.45 2334.35 2826.98 2332.94 2826.24 2330.99 2826.24 2328.51 2826.24 2327.38 2826.33 2326.3 2826.5 2325.28 2826.67 2324.25 2827.02 2322.57 2827.55 2320.24L2831.27 2304.56 2825.63 2304.56 2826.32 2301.63C2828 2301.63 2829.28 2301.47 2830.15 2301.16 2831.01 2300.85 2831.74 2300.38 2832.33 2299.75 2832.92 2299.12 2833.5 2298.17 2834.05 2296.9 2834.6 2295.62 2835.25 2293.66 2836 2291.01Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="45.4551" x="2825.63" y="2330.78">𝒕</text><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 2856.53 2315)">)<tspan font-size="101" x="-432.438" y="242">reward (</tspan></text><path d="M2834.5 2507.59C2837.22 2507.59 2839.4 2508.36 2841.03 2509.89 2842.66 2511.41 2843.47 2513.43 2843.47 2515.93 2843.47 2516.81 2843.29 2517.93 2842.93 2519.28L2843.32 2519.43C2846.09 2515.55 2848.98 2512.61 2852 2510.6 2855.03 2508.6 2858.38 2507.59 2862.06 2507.59 2864.23 2507.59 2866.54 2507.9 2868.97 2508.53L2866.06 2521.8 2857.23 2521.8C2857.23 2519.59 2856.99 2518.06 2856.52 2517.21 2856.04 2516.35 2855.23 2515.93 2854.07 2515.93 2853.19 2515.93 2852.22 2516.24 2851.17 2516.86 2850.11 2517.49 2848.86 2518.71 2847.39 2520.51 2845.93 2522.32 2844.83 2524.05 2844.09 2525.69 2843.35 2527.34 2842.7 2529.44 2842.14 2532L2836.57 2557.3 2822.41 2557.3 2830.35 2523.03C2830.85 2520.89 2831.09 2519.26 2831.09 2518.15 2831.09 2517.06 2830.91 2516.22 2830.55 2515.63 2830.19 2515.04 2829.56 2514.74 2828.68 2514.74 2827.66 2514.74 2826.56 2515.2 2825.37 2516.12 2824.19 2517.04 2822.81 2518.41 2821.23 2520.22L2817.68 2516.67C2820.97 2513.31 2823.88 2510.96 2826.43 2509.61 2828.98 2508.27 2831.67 2507.59 2834.5 2507.59Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="49.7109" x="2817.68" y="2551.09">𝒓</text><path d="M2885 2533.01 2893.67 2533.01 2891.57 2542.04 2900.35 2542.04 2899.38 2546.56 2890.52 2546.56 2887.02 2561.01C2886.42 2563.49 2886 2565.37 2885.77 2566.66 2885.54 2567.95 2885.43 2569.04 2885.43 2569.93 2885.43 2571.07 2885.64 2571.89 2886.04 2572.41 2886.45 2572.93 2887.06 2573.19 2887.85 2573.19 2888.94 2573.19 2890.05 2572.8 2891.19 2572.03 2892.34 2571.26 2893.57 2570.07 2894.9 2568.45L2897.68 2571.31C2895.25 2573.98 2893.01 2575.84 2890.98 2576.89 2888.94 2577.94 2886.54 2578.46 2883.77 2578.46 2881.02 2578.46 2878.91 2577.76 2877.44 2576.35 2875.98 2574.94 2875.24 2572.99 2875.24 2570.51 2875.24 2569.38 2875.32 2568.3 2875.49 2567.28 2875.66 2566.25 2876.01 2564.57 2876.54 2562.24L2880.26 2546.56 2874.63 2546.56 2875.31 2543.63C2877 2543.63 2878.28 2543.47 2879.14 2543.16 2880.01 2542.85 2880.74 2542.38 2881.33 2541.75 2881.92 2541.12 2882.49 2540.17 2883.05 2538.9 2883.6 2537.62 2884.25 2535.66 2885 2533.01Z" fill="#404040" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="45.4551" x="2874.63" y="2572.78">𝒕</text><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="101" transform="matrix(1 0 0 1 2905.53 2557)">)</text><path d="M3302 2372 3523.76 2372" stroke="#404040" stroke-width="13.75" stroke-miterlimit="8" stroke-dasharray="13.75 13.75" fill="none" fill-rule="evenodd"/><text fill="#404040" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="147" transform="matrix(1 0 0 1 2816.15 1994)">agent<tspan font-size="147" x="2208.12" y="4">env</tspan></text><rect x="4963" y="1520" width="145" height="145" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 5005.07 1633)">1</text><rect x="2730" y="1530" width="145" height="146" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 2771.99 1644)">7</text><path d="M1683.5 770.5C1683.5 652.863 1778.86 557.5 1896.5 557.5 2014.14 557.5 2109.5 652.863 2109.5 770.5 2109.5 888.137 2014.14 983.5 1896.5 983.5 1778.86 983.5 1683.5 888.137 1683.5 770.5Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="#EF7F1A" fill-rule="evenodd"/><path d="M1811 793.581 1811 749C1811 753.927 1848.64 763.119 1895.85 763.119 1943.06 763.119 1982 753.927 1982 749L1982 793.581C1982 798.509 1945.01 809 1897.8 809 1850.59 809 1811 798.509 1811 793.581Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1811 855.582 1811 811C1811 815.927 1848.64 825.119 1895.85 825.119 1943.06 825.119 1982 815.927 1982 811L1982 855.582C1982 860.509 1945.01 871 1897.8 871 1850.59 871 1811 860.509 1811 855.582Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1811 731.581 1811 687C1811 691.927 1848.64 701.119 1895.85 701.119 1943.06 701.119 1982 691.927 1982 687L1982 731.581C1982 736.509 1945.01 747 1897.8 747 1850.59 747 1811 736.509 1811 731.581Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1811 681C1811 672.716 1849.28 666 1896.5 666 1943.72 666 1982 672.716 1982 681 1982 689.284 1943.72 696 1896.5 696 1849.28 696 1811 689.284 1811 681Z" fill="#FFFFFF" fill-rule="evenodd"/><rect x="1599" y="692" width="146" height="145" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 1641.23 806)">2</text><path d="M1310.5 1117.5C1310.5 999.863 1405.64 904.5 1523 904.5 1640.36 904.5 1735.5 999.863 1735.5 1117.5 1735.5 1235.14 1640.36 1330.5 1523 1330.5 1405.64 1330.5 1310.5 1235.14 1310.5 1117.5Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="#EF7F1A" fill-rule="evenodd"/><path d="M1393 1063C1393 1042.57 1409.79 1026 1430.5 1026 1451.21 1026 1468 1042.57 1468 1063 1468 1083.43 1451.21 1100 1430.5 1100 1409.79 1100 1393 1083.43 1393 1063Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1393 1185.5C1393 1164.79 1409.79 1148 1430.5 1148 1451.21 1148 1468 1164.79 1468 1185.5 1468 1206.21 1451.21 1223 1430.5 1223 1409.79 1223 1393 1206.21 1393 1185.5Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1508 1219.5C1508 1203.21 1521.21 1190 1537.5 1190 1553.79 1190 1567 1203.21 1567 1219.5 1567 1235.79 1553.79 1249 1537.5 1249 1521.21 1249 1508 1235.79 1508 1219.5Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1507 1113.5C1507 1097.21 1520.21 1084 1536.5 1084 1552.79 1084 1566 1097.21 1566 1113.5 1566 1129.79 1552.79 1143 1536.5 1143 1520.21 1143 1507 1129.79 1507 1113.5Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1508 1006.5C1508 990.208 1520.98 977 1537 977 1553.02 977 1566 990.208 1566 1006.5 1566 1022.79 1553.02 1036 1537 1036 1520.98 1036 1508 1022.79 1508 1006.5Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1609 1141C1609 1118.36 1627.36 1100 1650 1100 1672.64 1100 1691 1118.36 1691 1141 1691 1163.64 1672.64 1182 1650 1182 1627.36 1182 1609 1163.64 1609 1141Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M0 0 105.534 56.5998" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" transform="matrix(1 0 0 -1 1431 1063.6)"/><path d="M1431 1060 1536.53 1113.41" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1430 1063 1536.11 1223.65" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1432 1187 1539.52 1218.95" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><path d="M0 0 99.9935 72.9613" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" transform="matrix(1 0 0 -1 1434 1186.96)"/><path d="M1648.93 1142.6 1534 1011" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><path d="M0 0 116.809 78.7063" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" transform="matrix(1 0 0 -1 1536 1219.71)"/><path d="M1649.88 1142.38 1533 1113" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><path d="M0 0 98.1548 170.678" stroke="#FFFFFF" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd" transform="matrix(1 0 0 -1 1432 1182.68)"/><rect x="1228" y="1041" width="145" height="145" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 1269.75 1155)">3</text><path d="M1174.5 1595C1174.5 1477.64 1269.86 1382.5 1387.5 1382.5 1505.14 1382.5 1600.5 1477.64 1600.5 1595 1600.5 1712.36 1505.14 1807.5 1387.5 1807.5 1269.86 1807.5 1174.5 1712.36 1174.5 1595Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="#EF7F1A" fill-rule="evenodd"/><path d="M1235 1598 1246 1563 1257 1598 1246 1633Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1257 1598 1267.5 1523 1278 1598 1267.5 1673Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1278 1598 1289 1498 1300 1598 1289 1698Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1298 1598 1309 1465 1320 1598 1309 1731Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1320 1598.5 1331 1560 1342 1598.5 1331 1637Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1342 1598 1353 1507 1364 1598 1353 1689Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1364 1598 1375 1465 1386 1598 1375 1731Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1386 1598 1396.5 1465 1407 1598 1396.5 1731Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1407 1598 1418 1498 1429 1598 1418 1698Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1427 1598 1438 1498 1449 1598 1438 1698Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1449 1598 1460 1465 1471 1598 1460 1731Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1471 1598 1482 1507 1493 1598 1482 1689Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1493 1598 1504 1523 1515 1598 1504 1673Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1515 1598 1526 1563 1537 1598 1526 1633Z" fill="#FFFFFF" fill-rule="evenodd"/><rect x="1091" y="1525" width="146" height="146" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 1133.28 1639)">4</text><path d="M1316.5 2085C1316.5 1967.64 1411.64 1872.5 1529 1872.5 1646.36 1872.5 1741.5 1967.64 1741.5 2085 1741.5 2202.36 1646.36 2297.5 1529 2297.5 1411.64 2297.5 1316.5 2202.36 1316.5 2085Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="#EF7F1A" fill-rule="evenodd"/><path d="M1390 2064.5 1533.5 2026 1677 2064.5 1533.5 2103Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1453 2139.12 1453 2099C1453 2103.43 1488 2111.71 1531.9 2111.71 1575.8 2111.71 1612 2103.43 1612 2099L1612 2139.12C1612 2143.56 1577.61 2153 1533.71 2153 1489.81 2153 1453 2143.56 1453 2139.12Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1453 2113 1453 2093 1533 2113Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M0 19.9999 0 0 80.9998 19.9999Z" fill="#FFFFFF" fill-rule="evenodd" transform="matrix(-1 0 0 1 1612 2093)"/><path d="M1620 2164 1626.25 2071 1638.75 2071 1645 2164Z" fill="#FFFFFF" fill-rule="evenodd"/><rect x="1234" y="2016" width="145" height="145" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 1275.9 2129)">5</text><rect x="3105" y="2330" width="145" height="146" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 3146.58 2444)">8</text><path d="M2108.45 1513.22 2108.45 1513.22C2106.88 1511.65 2106.88 1511.65 2106.88 1510.08L2106.88 1510.08 2106.88 1506.94 2106.88 1506.94 2106.88 1506.94 2106.88 1506.94C2105.3 1505.38 2105.3 1505.38 2105.3 1503.81L2105.3 1503.81 2105.3 1500.67 2105.3 1500.67C2103.73 1499.1 2103.73 1499.1 2103.73 1497.53L2103.73 1497.53 2103.73 1497.53 2103.73 1494.39 2103.73 1494.39 2103.73 1494.39C2103.73 1492.82 2102.15 1492.82 2102.15 1491.25L2102.15 1491.25 2102.15 1488.11 2102.15 1488.11 2102.15 1484.97 2102.15 1484.97C2102.15 1483.4 2100.57 1483.4 2100.57 1481.83L2100.57 1481.83 2100.57 1478.69 2100.57 1478.69 2100.57 1475.56 2100.57 1475.56 2100.57 1475.56 2100.57 1472.42 2100.57 1472.42C2100.57 1470.85 2099 1469.28 2099 1469.28L2099 1469.28 2099 1466.14 2099 1464.57 2099 1464.57 2099 1461.43 2099 1461.43 2099 1461.43 2099 1458.29 2099 1458.29 2099 1455.15 2099 1455.15 2099 1452.01C2099 1334.31 2195.08 1237 2313.21 1237 2432.92 1237 2529 1332.74 2529 1450.44 2526.38 1471.37 2529.87 1471.7 2521.12 1513.22 2500.65 1593.26 2445.52 1624.65 2442.37 1681.15L2442.37 1685.86C2442.37 1748.64 2390.39 1800.43 2327.39 1802L2325.81 1802 2303.76 1802 2303.76 1802C2240.76 1802 2187.21 1750.21 2187.21 1685.86L2187.21 1681.15C2184.05 1624.65 2128.93 1594.83 2108.45 1514.79L2108.45 1513.22ZM2102.15 1484.97C2102.15 1483.4 2100.57 1483.4 2100.57 1481.83" fill="#FFED00" fill-rule="evenodd"/><path d="M2380.5 1830.97C2396.13 1830.93 2408.82 1839.75 2408.85 1850.67L2408.84 1844.78C2408.86 1855.7 2396.21 1864.58 2380.58 1864.61L2257.33 1864.91C2241.7 1864.95 2229 1856.13 2228.98 1845.22L2228.99 1851.11C2228.96 1840.19 2241.61 1831.31 2257.25 1831.27Z" stroke="#5B5B5B" stroke-width="4.44764" stroke-miterlimit="8" fill="#5B5B5B" fill-rule="evenodd"/><path d="M2374.47 1885.78C2388.54 1885.74 2399.97 1894.56 2399.99 1905.48L2399.98 1899.59C2400 1910.51 2388.62 1919.38 2374.55 1919.42L2263.62 1919.69C2249.56 1919.72 2238.13 1910.9 2238.1 1899.98L2238.12 1905.87C2238.09 1894.96 2249.47 1886.08 2263.54 1886.05Z" stroke="#5B5B5B" stroke-width="4.18599" stroke-miterlimit="8" fill="#5B5B5B" fill-rule="evenodd"/><path d="M2349.95 1940.62C2357.77 1940.61 2364.12 1949.44 2364.15 1960.35L2364.14 1954.47C2364.16 1965.38 2357.85 1974.25 2350.03 1974.27L2288.41 1974.42C2280.59 1974.43 2274.23 1965.6 2274.21 1954.69L2274.22 1960.57C2274.19 1949.66 2280.51 1940.79 2288.33 1940.78Z" stroke="#5B5B5B" stroke-width="3.13868" stroke-miterlimit="8" fill="#5B5B5B" fill-rule="evenodd"/><path d="M2398.02 1293.31C2485.49 1337.94 2520.18 1445.07 2475.52 1532.61 2430.85 1620.14 2323.74 1654.92 2236.28 1610.29 2148.81 1565.66 2114.12 1458.52 2158.78 1370.99 2203.45 1283.46 2310.56 1248.68 2398.02 1293.31Z" fill="#FEC20E" fill-rule="evenodd"/><path d="M2321.82 1434.26C2373.23 1433.32 2415.65 1473.3 2416.57 1523.55L2418.36 1621.91C2419.28 1672.16 2378.36 1713.66 2326.95 1714.6L2310.6 1714.9C2259.19 1715.84 2216.78 1675.87 2215.86 1625.61L2214.06 1527.25C2213.14 1477 2254.07 1435.5 2305.47 1434.56Z" fill="#FEC20E" fill-rule="evenodd"/><path d="M2304.97 1485.04 2323.78 1485.04C2373.94 1483.48 2414.7 1524.24 2416.27 1574.41L2417.84 1619.88C2420.97 1671.62 2378.65 1713.96 2328.48 1713.96L2309.67 1713.96C2259.5 1715.52 2217.18 1674.76 2215.61 1624.59L2214.04 1579.12C2212.47 1527.38 2254.8 1485.04 2304.97 1485.04Z" fill="#EF7F1A" fill-rule="evenodd"/><path d="M2260.31 1468.62C2231.92 1484.4 2213 1514.39 2213 1549.1L2213 1594.86C2231.92 1609.06 2252.42 1618.53 2274.5 1623.27 2280.81 1624.84 2287.12 1626.42 2295 1628 2301.31 1628 2307.62 1628 2313.92 1628 2313.92 1628 2315.5 1628 2315.5 1628 2353.35 1628 2388.04 1615.38 2418 1594.86L2416.42 1549.1C2416.42 1534.9 2413.27 1520.7 2408.54 1509.65 2392.77 1478.09 2361.23 1456 2324.96 1456L2315.5 1456 2306.04 1456C2288.69 1456 2274.5 1460.73 2260.31 1468.62Z" fill="#E31E24" fill-rule="evenodd"/><path d="M1692.5 2426C1692.5 2308.64 1787.86 2213.5 1905.5 2213.5 2023.14 2213.5 2118.5 2308.64 2118.5 2426 2118.5 2543.36 2023.14 2638.5 1905.5 2638.5 1787.86 2638.5 1692.5 2543.36 1692.5 2426Z" stroke="#404040" stroke-width="20.625" stroke-miterlimit="8" fill="#EF7F1A" fill-rule="evenodd"/><rect x="1610" y="2356" width="145" height="146" fill="#C00000"/><text fill="#FFFFFF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="110" transform="matrix(1 0 0 1 1652.13 2470)">6</text><path d="M2019 2400 1910.5 2505 1802 2400Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1893 2455 1928 2455 1928 2534.04 1893 2559 1893 2455Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1826 2348C1826 2335.3 1835.63 2325 1847.5 2325 1859.37 2325 1869 2335.3 1869 2348 1869 2360.7 1859.37 2371 1847.5 2371 1835.63 2371 1826 2360.7 1826 2348Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1923 2338.5C1923 2318.34 1937.77 2302 1956 2302 1974.23 2302 1989 2318.34 1989 2338.5 1989 2358.66 1974.23 2375 1956 2375 1937.77 2375 1923 2358.66 1923 2338.5Z" fill="#FFFFFF" fill-rule="evenodd"/><path d="M1886 2372C1886 2364.82 1891.37 2359 1898 2359 1904.63 2359 1910 2364.82 1910 2372 1910 2379.18 1904.63 2385 1898 2385 1891.37 2385 1886 2379.18 1886 2372Z" fill="#FFFFFF" fill-rule="evenodd"/></g></svg>
\ No newline at end of file

From d7e10a2d6f779ba73fe842b0e32a49e950c9b911 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 1 Aug 2022 00:11:45 +0200
Subject: [PATCH 032/108] Update Tensorboard log table in docs

---
 docs/source/intro/data.rst | 94 +++++++++++++++++++-------------------
 1 file changed, 48 insertions(+), 46 deletions(-)

diff --git a/docs/source/intro/data.rst b/docs/source/intro/data.rst
index f5e358c4..9deef70b 100644
--- a/docs/source/intro/data.rst
+++ b/docs/source/intro/data.rst
@@ -51,51 +51,53 @@ To visualize the tracked metrics/scales, during or after the training, TensorBoa
 
 The following table shows the metrics/scales tracked by each agent ([**+**] all the time, [**-**] only when such a function is enabled in the agent's configuration):
 
-+-----------+--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|Tag        |Metric / Scalar     |.. centered:: CEM |.. centered:: DDPG|.. centered:: DQN |.. centered:: DDQN|.. centered:: PPO |.. centered:: Q-learning |.. centered:: SAC |.. centered:: SARSA |.. centered:: TD3 |.. centered:: TRPO|
-+===========+====================+==================+==================+==================+==================+==================+=========================+==================+====================+==================+==================+
-|Coefficient|Entropy coefficient |                  |                  |                  |                  |                  |                         |.. centered:: +   |                    |                  |                  |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Return threshold    |.. centered:: +   |                  |                  |                  |                  |                         |                  |                    |                  |                  |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Mean disc. returns  |.. centered:: +   |                  |                  |                  |                  |                         |                  |                    |                  |                  |
-+-----------+--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|Episode    |Total timesteps     |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +          |.. centered:: +   |.. centered:: +     |.. centered:: +   |.. centered:: +   |
-+-----------+--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|Exploration|Exploration noise   |                  |.. centered:: +   |                  |                  |                  |                         |                  |                    |.. centered:: +   |                  |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Exploration epsilon |                  |                  |.. centered:: +   |.. centered:: +   |                  |                         |                  |                    |                  |                  |
-+-----------+--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|Learning   |Learning rate       |.. centered:: --  |                  |.. centered:: --  |.. centered:: --  |.. centered:: --  |                         |                  |                    |                  |                  |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Policy learning rate|                  |.. centered:: --  |                  |                  |                  |                         |.. centered:: --  |                    |.. centered:: --  |                  |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Critic learning rate|                  |.. centered:: --  |                  |                  |                  |                         |.. centered:: --  |                    |.. centered:: --  |                  |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Return threshold    |                  |                  |                  |                  |                  |                         |                  |                    |                  |.. centered:: --  |
-+-----------+--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|Loss       |Policy loss         |.. centered:: +   |.. centered:: +   |                  |                  |.. centered:: +   |                         |.. centered:: +   |                    |.. centered:: +   |.. centered:: +   |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Critic loss         |                  |.. centered:: +   |                  |                  |                  |                         |.. centered:: +   |                    |.. centered:: +   |                  |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Value loss          |                  |                  |                  |                  |.. centered:: +   |                         |                  |                    |                  |.. centered:: +   |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Entropy loss        |                  |                  |                  |                  |.. centered:: --  |                         |.. centered:: --  |                    |                  |                  |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Q-network loss      |                  |                  |.. centered:: +   |.. centered:: +   |                  |                         |                  |                    |                  |                  |
-+-----------+--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|Policy     |Standard deviation  |                  |                  |                  |                  |.. centered:: +   |                         |                  |                    |                  |.. centered:: +   |
-+-----------+--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|Q-network  |Q1                  |                  |.. centered:: +   |                  |                  |                  |                         |.. centered:: +   |                    |.. centered:: +   |                  |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Q2                  |                  |                  |                  |                  |                  |                         |.. centered:: +   |                    |.. centered:: +   |                  |
-+-----------+--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|Reward     |Instantaneous reward|.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +          |.. centered:: +   |.. centered:: +     |.. centered:: +   |.. centered:: +   |
-+           +--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|           |Total reward        |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +          |.. centered:: +   |.. centered:: +     |.. centered:: +   |.. centered:: +   |
-+-----------+--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
-|Target     |Target              |                  |.. centered:: +   |.. centered:: +   |.. centered:: +   |                  |                         |.. centered:: +   |                    |.. centered:: +   |                  |
-+-----------+--------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
++-----------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|Tag        |Metric / Scalar     |.. centered:: A2C |.. centered:: AMP |.. centered:: CEM |.. centered:: DDPG|.. centered:: DDQN|.. centered:: DQN |.. centered:: PPO |.. centered:: Q-learning |.. centered:: SAC |.. centered:: SARSA |.. centered:: TD3 |.. centered:: TRPO|
++===========+====================+==================+==================+==================+==================+==================+==================+==================+=========================+==================+====================+==================+==================+
+|Coefficient|Entropy coefficient |                  |                  |                  |                  |                  |                  |                  |                         |.. centered:: +   |                    |                  |                  |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Return threshold    |                  |                  |.. centered:: +   |                  |                  |                  |                  |                         |                  |                    |                  |                  |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Mean disc. returns  |                  |                  |.. centered:: +   |                  |                  |                  |                  |                         |                  |                    |                  |                  |
++-----------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|Episode    |Total timesteps     |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +          |.. centered:: +   |.. centered:: +     |.. centered:: +   |.. centered:: +   |
++-----------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|Exploration|Exploration noise   |                  |                  |                  |.. centered:: +   |                  |                  |                  |                         |                  |                    |.. centered:: +   |                  |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Exploration epsilon |                  |                  |                  |                  |.. centered:: +   |.. centered:: +   |                  |                         |                  |                    |                  |                  |
++-----------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|Learning   |Learning rate       |.. centered:: +   |.. centered:: +   |.. centered:: --  |                  |.. centered:: --  |.. centered:: --  |.. centered:: --  |                         |                  |                    |                  |                  |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Policy learning rate|                  |                  |                  |.. centered:: --  |                  |                  |                  |                         |.. centered:: --  |                    |.. centered:: --  |                  |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Critic learning rate|                  |                  |                  |.. centered:: --  |                  |                  |                  |                         |.. centered:: --  |                    |.. centered:: --  |                  |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Return threshold    |                  |                  |                  |                  |                  |                  |                  |                         |                  |                    |                  |.. centered:: --  |
++-----------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|Loss       |Critic loss         |                  |                  |                  |.. centered:: +   |                  |                  |                  |                         |.. centered:: +   |                    |.. centered:: +   |                  |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Entropy loss        |.. centered:: --  |.. centered:: --  |                  |                  |                  |                  |.. centered:: --  |                         |.. centered:: --  |                    |                  |                  |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Discriminator loss  |                  |.. centered:: +   |                  |                  |                  |                  |                  |                         |                  |                    |                  |                  |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Policy loss         |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |                  |                  |.. centered:: +   |                         |.. centered:: +   |                    |.. centered:: +   |.. centered:: +   |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Q-network loss      |                  |                  |                  |                  |.. centered:: +   |.. centered:: +   |                  |                         |                  |                    |                  |                  |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Value loss          |.. centered:: +   |.. centered:: +   |                  |                  |                  |                  |.. centered:: +   |                         |                  |                    |                  |.. centered:: +   |
++-----------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|Policy     |Standard deviation  |.. centered:: +   |.. centered:: +   |                  |                  |                  |                  |.. centered:: +   |                         |                  |                    |                  |.. centered:: +   |
++-----------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|Q-network  |Q1                  |                  |                  |                  |.. centered:: +   |                  |                  |                  |                         |.. centered:: +   |                    |.. centered:: +   |                  |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Q2                  |                  |                  |                  |                  |                  |                  |                  |                         |.. centered:: +   |                    |.. centered:: +   |                  |
++-----------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|Reward     |Instantaneous reward|.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +          |.. centered:: +   |.. centered:: +     |.. centered:: +   |.. centered:: +   |
++           +--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|           |Total reward        |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +   |.. centered:: +          |.. centered:: +   |.. centered:: +     |.. centered:: +   |.. centered:: +   |
++-----------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
+|Target     |Target              |                  |                  |                  |.. centered:: +   |.. centered:: +   |.. centered:: +   |                  |                         |.. centered:: +   |                    |.. centered:: +   |                  |
++-----------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+-------------------------+------------------+--------------------+------------------+------------------+
 
 Tracking custom metrics/scales
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -163,7 +165,7 @@ The best models are updated internally on each TensorBoard writing interval :lit
 Loading checkpoints
 ^^^^^^^^^^^^^^^^^^^
 
-Checkpoints can be loaded for each of the instantiated models independently via the :literal:`.load(...)` method (`Model.load <../modules/skrl.models.base_class.html#skrl.models.torch.base.Model.load>`_). It accepts the path (relative or absolute) of the checkpoint to load as the only argument
+Checkpoints can be loaded for each of the instantiated models independently via the :literal:`.load(...)` method (`Model.load <../modules/skrl.models.base_class.html#skrl.models.torch.base.Model.load>`_). It accepts the path (relative or absolute) of the checkpoint to load as the only argument. The checkpoint will be dynamically mapped to the device specified as argument in the class constructor (internally the torch load's :literal:`map_location` method is used during loading)
 
 .. note::
 

From 971c486d175798f95cc45c59d2aa33855bb3e690 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 1 Aug 2022 00:14:08 +0200
Subject: [PATCH 033/108] Update CHANGELOG

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cd35cd6e..b48c1b53 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,9 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Added
 - AMP agent for physics-based character animation
 - Gaussian model
+- Manual trainer
 
 ### Changed
 - Multivariate Gaussian model (`GaussianModel` until 0.7.0) to `MultivariateGaussianModel`
+- Trainer's `cfg` parameter position and default values
 
 ## [0.7.0] - 2022-07-11
 ### Added

From ae81fd25ef659497b7a6db4ab5a91d0c9bc36c7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 1 Aug 2022 00:26:50 +0200
Subject: [PATCH 034/108] Add tqdm to sphinx requirements

---
 docs/requirements.txt              | 1 +
 docs/source/intro/installation.rst | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 89459e2b..65315ffa 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,3 +5,4 @@ sphinx-tabs==3.2.0
 gym
 torch
 tensorboard
+tqdm
diff --git a/docs/source/intro/installation.rst b/docs/source/intro/installation.rst
index 1663cccb..3c96b006 100644
--- a/docs/source/intro/installation.rst
+++ b/docs/source/intro/installation.rst
@@ -11,6 +11,7 @@ Prerequisites
 **skrl** requires Python 3.6 or higher and the following libraries (they will be installed automatically):
 
     * `gym <https://www.gymlibrary.ml>`_
+    * `tqdm <https://tqdm.github.io>`_
     * `torch <https://pytorch.org>`_ 1.8.0 or higher
     * `tensorboard <https://www.tensorflow.org/tensorboard>`_
 

From 00647f1c08357ffb4a341d233ef14ef1860763d3 Mon Sep 17 00:00:00 2001
From: Johann Christensen <johannlange@outlook.de>
Date: Mon, 15 Aug 2022 09:46:44 +0200
Subject: [PATCH 035/108] Support new step api from gym 0.25.0

---
 skrl/envs/torch/wrappers.py | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/skrl/envs/torch/wrappers.py b/skrl/envs/torch/wrappers.py
index 62110e02..c4dfb43c 100644
--- a/skrl/envs/torch/wrappers.py
+++ b/skrl/envs/torch/wrappers.py
@@ -89,7 +89,7 @@ def num_envs(self) -> int:
     def state_space(self) -> gym.Space:
         """State space
 
-        If the wrapped environment does not have the ``state_space`` property, 
+        If the wrapped environment does not have the ``state_space`` property,
         the value of the ``observation_space`` property will be used
         """
         return self._env.state_space if hasattr(self._env, "state_space") else self._env.observation_space
@@ -115,7 +115,7 @@ def __init__(self, env: Any) -> None:
         :type env: Any supported Isaac Gym environment (preview 2) environment
         """
         super().__init__(env)
-        
+
         self._reset_once = True
         self._obs_buf = None
 
@@ -192,7 +192,7 @@ def render(self, *args, **kwargs) -> None:
         """Render the environment
         """
         pass
-    
+
     def close(self) -> None:
         """Close the environment
         """
@@ -248,7 +248,7 @@ def render(self, *args, **kwargs) -> None:
         """Render the environment
         """
         pass
-    
+
     def close(self) -> None:
         """Close the environment
         """
@@ -271,6 +271,11 @@ def __init__(self, env: Any) -> None:
         except Exception as e:
             print("[WARNING] Failed to check for a vectorized environment: {}".format(e))
 
+        if hasattr(self, "new_step_api"):
+            self._new_step_api = self._env.new_step_api
+        else:
+            self._new_step_api = False
+
     @property
     def state_space(self) -> gym.Space:
         """State space
@@ -359,13 +364,17 @@ def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch
         :return: The state, the reward, the done flag, and the info
         :rtype: tuple of torch.Tensor and any other info
         """
-        observation, reward, done, info = self._env.step(self._tensor_to_action(actions))
+        if self._new_step_api:
+            observation, reward, termination, truncation, info = self._env.step(self._tensor_to_action(actions))
+            done = termination or truncation
+        else:
+            observation, reward, done, info = self._env.step(self._tensor_to_action(actions))
         # convert response to torch
         return self._observation_to_tensor(observation), \
                torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1), \
                torch.tensor(done, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \
                info
-        
+
     def reset(self) -> torch.Tensor:
         """Reset the environment
 
@@ -508,7 +517,7 @@ def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch
         reward = timestep.reward if timestep.reward is not None else 0
         done = timestep.last()
         info = {}
-        
+
         # convert response to torch
         return self._observation_to_tensor(observation), \
                torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1), \
@@ -562,7 +571,7 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper:
                     .. raw:: html
 
                         <br>
-                    
+
                     +--------------------+-------------------------+
                     |Environment         |Wrapper tag              |
                     +====================+=========================+
@@ -581,9 +590,9 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper:
     :type wrapper: str, optional
     :param verbose: Whether to print the wrapper type (default: True)
     :type verbose: bool, optional
-    
+
     :raises ValueError: Unknow wrapper type
-    
+
     :return: Wrapped environment
     :rtype: Wrapper
     """

From a0b8300dddaa612ceee1c5fe16e16102cfeb3723 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Wed, 17 Aug 2022 23:12:16 +0200
Subject: [PATCH 036/108] Migrate external model's state dict to current model

---
 skrl/models/torch/base.py | 91 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 1 deletion(-)

diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py
index ad810ce0..9d36f45b 100644
--- a/skrl/models/torch/base.py
+++ b/skrl/models/torch/base.py
@@ -1,6 +1,7 @@
-from typing import Union, Tuple
+from typing import Optional, Union, Mapping, Tuple
 
 import gym
+import collections
 import numpy as np
 
 import torch
@@ -304,6 +305,94 @@ def load(self, path: str) -> None:
         """
         self.load_state_dict(torch.load(path, map_location=self.device))
         self.eval()
+
+    def migrate(self,
+                state_dict: Mapping[str, torch.Tensor],
+                name_map: Optional[Mapping[str, str]] = {},
+                auto_mapping: Optional[bool] = True,
+                show_names: Optional[bool] = False) -> bool:
+        """Migrate the specified extrernal model's state dict to the current model
+
+        :param state_dict: External model's state dict to migrate from
+        :type state_dict: Mapping[str, torch.Tensor]
+        :param name_map: Name map to use for the migration (default: {}).
+                         Keys are the current parameter names and values are the external parameter names
+        :type name_map: Mapping[str, str], optional
+        :param auto_mapping: Automatically map the external state dict to the current state dict (default: True)
+        :type auto_mapping: bool, optional
+        :param show_names: Show the names of both, current and external state dicts parameters (default: False)
+        :type show_names: bool, optional
+
+        :return: True if the migration was successful, False otherwise.
+                 Migration is successful if all parameters of the current model are found in the external model
+        :rtype: bool
+        """
+        # Show state_dict
+        if show_names:
+            print("Model migration")
+            print("Current state_dict:")
+            for name, tensor in self.state_dict().items():
+                print("  |-- {} : {}".format(name, tensor.shape))
+            print("Source state_dict:")
+            for name, tensor in state_dict.items():
+                print("  |-- {} : {}".format(name, tensor.shape))
+
+        # migrate the state dict to current model
+        new_state_dict = collections.OrderedDict()
+        match_counter = collections.defaultdict(list)
+        used_counter = collections.defaultdict(list)
+        for name, tensor in self.state_dict().items():
+            for external_name, external_tensor in state_dict.items():
+                # mapped names
+                if name_map.get(name, "") == external_name:
+                    if tensor.shape == external_tensor.shape:
+                        new_state_dict[name] = external_tensor
+                        match_counter[name].append(external_name)
+                        used_counter[external_name].append(name)
+                        break
+                    else:
+                        print("Shape mismatch for {} <- {} : {} != {}".format(name, external_name, tensor.shape, external_tensor.shape))
+                # auto-mapped names
+                if auto_mapping:
+                    if tensor.shape == external_tensor.shape:
+                        if name.endswith(".weight"):
+                            if external_name.endswith(".weight"):
+                                new_state_dict[name] = external_tensor
+                                match_counter[name].append(external_name)
+                                used_counter[external_name].append(name)
+                        elif name.endswith(".bias"):
+                            if external_name.endswith(".bias"):
+                                new_state_dict[name] = external_tensor
+                                match_counter[name].append(external_name)
+                                used_counter[external_name].append(name)
+                        else:
+                            if not external_name.endswith(".weight") and not external_name.endswith(".bias"):
+                                new_state_dict[name] = external_tensor
+                                match_counter[name].append(external_name)
+                                used_counter[external_name].append(name)
+
+        # show ambiguous matches
+        status = True
+        for name, tensor in self.state_dict().items():
+            if len(match_counter.get(name, [])) > 1:
+                print("Ambiguous match for {} <- {}".format(name, match_counter.get(name, [])))
+                status = False
+        # show missing matches
+        for name, tensor in self.state_dict().items():
+            if not match_counter.get(name, []):
+                print("Missing match for {}".format(name))
+                status = False
+        # show duplicated uses
+        for name, tensor in state_dict.items():
+            if len(used_counter.get(name, [])) > 1:
+                print("Duplicated use of {} -> {}".format(name, used_counter.get(name, [])))
+                status = False
+
+        # load new state dict
+        self.load_state_dict(new_state_dict, strict=False)
+        self.eval()
+
+        return status
     
     def freeze_parameters(self, freeze: bool = True) -> None:
         """Freeze or unfreeze internal parameters

From 1a4d14aa00908c10957e16aefe790b225d84e330 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 20 Aug 2022 23:11:35 +0200
Subject: [PATCH 037/108] Add role parameter to models' act and compute methods

---
 skrl/models/torch/base.py                  | 10 ++++++++--
 skrl/models/torch/categorical.py           |  8 ++++++--
 skrl/models/torch/deterministic.py         |  8 ++++++--
 skrl/models/torch/gaussian.py              |  8 ++++++--
 skrl/models/torch/multivariate_gaussian.py |  8 ++++++--
 5 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py
index 9d36f45b..87d3b282 100644
--- a/skrl/models/torch/base.py
+++ b/skrl/models/torch/base.py
@@ -227,7 +227,8 @@ def forward(self):
 
     def compute(self, 
                 states: torch.Tensor, 
-                taken_actions: Union[torch.Tensor, None] = None) -> Union[torch.Tensor, Tuple[torch.Tensor]]:
+                taken_actions: Union[torch.Tensor, None] = None,
+                role: str = "") -> Union[torch.Tensor, Tuple[torch.Tensor]]:
         """Define the computation performed (to be implemented by the inheriting classes) by the models
 
         :param states: Observation/state of the environment used to make the decision
@@ -235,6 +236,8 @@ def compute(self,
         :param taken_actions: Actions taken by a policy to the given states (default: None).
                               The use of these actions only makes sense in critical models, e.g.
         :type taken_actions: torch.Tensor or None, optional
+        :param role: Role of the agent (default: "")
+        :type role: str, optional
 
         :raises NotImplementedError: Child class must implement this method
         
@@ -246,7 +249,8 @@ def compute(self,
     def act(self, 
             states: torch.Tensor, 
             taken_actions: Union[torch.Tensor, None] = None, 
-            inference=False) -> Tuple[torch.Tensor]:
+            inference=False,
+            role: str = "") -> Tuple[torch.Tensor]:
         """Act according to the specified behavior (to be implemented by the inheriting classes)
 
         Agents will call this method to obtain the decision to be taken given the state of the environment.
@@ -260,6 +264,8 @@ def act(self,
         :type taken_actions: torch.Tensor or None, optional
         :param inference: Flag to indicate whether the model is making inference (default: False)
         :type inference: bool, optional
+        :param role: Role of the agent (default: "")
+        :type role: str, optional
 
         :raises NotImplementedError: Child class must implement this method
         
diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py
index 2e099187..19aa2594 100644
--- a/skrl/models/torch/categorical.py
+++ b/skrl/models/torch/categorical.py
@@ -39,7 +39,8 @@ def __init__(self,
     def act(self, 
             states: torch.Tensor, 
             taken_actions: Union[torch.Tensor, None] = None, 
-            inference=False) -> Tuple[torch.Tensor]:
+            inference=False,
+            role: str = "") -> Tuple[torch.Tensor]:
         """Act stochastically in response to the state of the environment
 
         :param states: Observation/state of the environment used to make the decision
@@ -50,6 +51,8 @@ def act(self,
         :param inference: Flag to indicate whether the model is making inference (default: False).
                           If True, the returned tensors will be detached from the current graph
         :type inference: bool, optional
+        :param role: Role of the agent (default: "")
+        :type role: str, optional
 
         :return: Action to be taken by the agent given the state of the environment.
                  The tuple's components are the actions, the log of the probability density function and the model's output
@@ -58,7 +61,8 @@ def act(self,
         # map from states/observations to normalized probabilities or unnormalized log probabilities
         if self._instantiator_net is None:
             output = self.compute(states.to(self.device), 
-                                  taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+                                  taken_actions.to(self.device) if taken_actions is not None else taken_actions,
+                                  role)
         else:
             output = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py
index d96e6f64..7ee8c63b 100644
--- a/skrl/models/torch/deterministic.py
+++ b/skrl/models/torch/deterministic.py
@@ -40,7 +40,8 @@ def __init__(self,
     def act(self, 
             states: torch.Tensor, 
             taken_actions: Union[torch.Tensor, None] = None, 
-            inference=False) -> Tuple[torch.Tensor]:
+            inference=False,
+            role: str = "") -> Tuple[torch.Tensor]:
         """Act deterministically in response to the state of the environment
 
         :param states: Observation/state of the environment used to make the decision
@@ -51,6 +52,8 @@ def act(self,
         :param inference: Flag to indicate whether the model is making inference (default: False).
                           If True, the returned tensors will be detached from the current graph
         :type inference: bool, optional
+        :param role: Role of the agent (default: "")
+        :type role: str, optional
 
         :return: Action to be taken by the agent given the state of the environment.
                  The tuple's components are the computed actions and None for the last two components
@@ -59,7 +62,8 @@ def act(self,
         # map from observations/states to actions
         if self._instantiator_net is None:
             actions = self.compute(states.to(self.device), 
-                                   taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+                                   taken_actions.to(self.device) if taken_actions is not None else taken_actions,
+                                   role)
         else:
             actions = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index 24b117f0..dfcbc705 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -70,7 +70,8 @@ def __init__(self,
     def act(self, 
             states: torch.Tensor, 
             taken_actions: Union[torch.Tensor, None] = None, 
-            inference=False) -> Tuple[torch.Tensor]:
+            inference=False,
+            role: str = "") -> Tuple[torch.Tensor]:
         """Act stochastically in response to the state of the environment
 
         :param states: Observation/state of the environment used to make the decision
@@ -81,6 +82,8 @@ def act(self,
         :param inference: Flag to indicate whether the model is making inference (default: False).
                           If True, the returned tensors will be detached from the current graph
         :type inference: bool, optional
+        :param role: Role of the agent (default: "")
+        :type role: str, optional
         
         :return: Action to be taken by the agent given the state of the environment.
                  The tuple's components are the actions, the log of the probability density function and mean actions
@@ -89,7 +92,8 @@ def act(self,
         # map from states/observations to mean actions and log standard deviations
         if self._instantiator_net is None:
             actions_mean, log_std = self.compute(states.to(self.device), 
-                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions,
+                                                 role)
         else:
             actions_mean, log_std = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py
index 1f61f275..1b06f813 100644
--- a/skrl/models/torch/multivariate_gaussian.py
+++ b/skrl/models/torch/multivariate_gaussian.py
@@ -58,7 +58,8 @@ def __init__(self,
     def act(self, 
             states: torch.Tensor, 
             taken_actions: Union[torch.Tensor, None] = None, 
-            inference=False) -> Tuple[torch.Tensor]:
+            inference=False,
+            role: str = "") -> Tuple[torch.Tensor]:
         """Act stochastically in response to the state of the environment
 
         :param states: Observation/state of the environment used to make the decision
@@ -69,6 +70,8 @@ def act(self,
         :param inference: Flag to indicate whether the model is making inference (default: False).
                           If True, the returned tensors will be detached from the current graph
         :type inference: bool, optional
+        :param role: Role of the agent (default: "")
+        :type role: str, optional
         
         :return: Action to be taken by the agent given the state of the environment.
                  The tuple's components are the actions, the log of the probability density function and mean actions
@@ -77,7 +80,8 @@ def act(self,
         # map from states/observations to mean actions and log standard deviations
         if self._instantiator_net is None:
             actions_mean, log_std = self.compute(states.to(self.device), 
-                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions,
+                                                 role)
         else:
             actions_mean, log_std = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)

From bb78edb11b397c6e43a47b2ecf6d09e41b7e6409 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 21 Aug 2022 23:33:15 +0200
Subject: [PATCH 038/108] Improve model annotations and docstrings

---
 skrl/models/torch/base.py                  | 246 +++++++++++++++------
 skrl/models/torch/categorical.py           |  89 ++++++--
 skrl/models/torch/deterministic.py         |  83 +++++--
 skrl/models/torch/gaussian.py              | 116 +++++++---
 skrl/models/torch/multivariate_gaussian.py | 110 +++++++--
 5 files changed, 487 insertions(+), 157 deletions(-)

diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py
index 87d3b282..a53bff52 100644
--- a/skrl/models/torch/base.py
+++ b/skrl/models/torch/base.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union, Mapping, Tuple
+from typing import Optional, Union, Mapping, Sequence
 
 import gym
 import collections
@@ -9,29 +9,45 @@
 
 class Model(torch.nn.Module):
     def __init__(self, 
-                 observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
-                 action_space: Union[int, Tuple[int], gym.Space, None] = None, 
+                 observation_space: Union[int, Sequence[int], gym.Space], 
+                 action_space: Union[int, Sequence[int], gym.Space], 
                  device: Union[str, torch.device] = "cuda:0") -> None:
         """Base class representing a function approximator
 
         The following properties are defined:
 
         - ``device`` (torch.device): Device to be used for the computations
-        - ``observation_space`` (int, tuple or list of integers, gym.Space or None): Observation/state space
-        - ``action_space`` (int, tuple or list of integers, gym.Space or None): Action space
-        - ``num_observations`` (int or None): Number of elements in the observation/state space
-        - ``num_actions`` (int or None): Number of elements in the action space
+        - ``observation_space`` (int, sequence of int, gym.Space): Observation/state space
+        - ``action_space`` (int, sequence of int, gym.Space): Action space
+        - ``num_observations`` (int): Number of elements in the observation/state space
+        - ``num_actions`` (int): Number of elements in the action space
         
-        :param observation_space: Observation/state space or shape (default: None).
-                                  If it is not None, the num_observations property will contain the size of that space
-        :type observation_space: int, tuple or list of integers, gym.Space or None, optional
-        :param action_space: Action space or shape (default: None).
-                             If it is not None, the num_actions property will contain the size of that space
-        :type action_space: int, tuple or list of integers, gym.Space or None, optional
-        :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0")
+        :param observation_space: Observation/state space or shape.
+                                  The ``num_observations`` property will contain the size of that space
+        :type observation_space: int, sequence of int, gym.Space
+        :param action_space: Action space or shape.
+                             The ``num_actions`` property will contain the size of that space
+        :type action_space: int, sequence of int, gym.Space
+        :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``)
         :type device: str or torch.device, optional
+
+        Custom models should override the ``act`` method::
+
+            import torch
+            from skrl.models.torch import Model
+
+            class CustomModel(Model):
+                def __init__(self, observation_space, action_space, device="cuda:0"):
+                    super().__init__(observation_space, action_space, device)
+
+                    self.layer_1 = nn.Linear(self.num_observations, 64)
+                    self.layer_2 = nn.Linear(64, self.num_actions)
+
+                def act(self, states, taken_actions=None, inference=False, role=""):
+                    x = F.relu(self.layer_1(states))
+                    x = F.relu(self.layer_2(x))
+                    return x
         """
-        # TODO: export to onnx (https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html)
         super(Model, self).__init__()
 
         self.device = torch.device(device)
@@ -51,7 +67,7 @@ def __init__(self,
         
     def _get_instantiator_output(self, 
                                  states: torch.Tensor, 
-                                 taken_actions: Union[torch.Tensor, None] = None) -> Tuple[torch.Tensor]:
+                                 taken_actions: Optional[torch.Tensor] = None) -> Sequence[torch.Tensor]:
         """Get the output of the instantiator model
         
         Input shape depends on the instantiator (see skrl.utils.model_instantiator.Shape) as follows:
@@ -62,11 +78,11 @@ def _get_instantiator_output(self,
 
         :param states: Observation/state of the environment used to make the decision
         :type states: torch.Tensor
-        :param taken_actions: Actions taken by a policy to the given states (default: None)
+        :param taken_actions: Actions taken by a policy to the given states (default: ``None``)
         :type taken_actions: torch.Tensor, optional
 
         :return: Output of the instantiator model
-        :rtype: tuple of torch.Tensor
+        :rtype: sequence of torch.Tensor
         """
         if self._instantiator_input_type == 0:
             output = self._instantiator_net(states)
@@ -82,16 +98,51 @@ def _get_instantiator_output(self,
         else:
             return output * self._instantiator_output_scale, self._instantiator_parameter
 
-    def _get_space_size(self, space: Union[int, Tuple[int], gym.Space]) -> int:
+    def _get_space_size(self, 
+                        space: Union[int, Sequence[int], gym.Space],
+                        number_of_elements: bool = True) -> int:
         """Get the size (number of elements) of a space
 
         :param space: Space or shape from which to obtain the number of elements
-        :type space: int, tuple or list of integers, or gym.Space
+        :type space: int, sequence of int, or gym.Space
+        :param number_of_elements: Whether the number of elements occupied by the space is returned (default: ``True``). 
+                                   If ``False``, the shape of the space is returned. It only affects Discrete spaces
+        :type number_of_elements: bool, optional
 
         :raises ValueError: If the space is not supported
 
         :return: Size of the space (number of elements)
         :rtype: int
+
+        Example::
+
+            # from int
+            >>> model._get_space_size(2)
+            2
+
+            # from sequence of int
+            >>> model._get_space_size([2, 3])
+            6
+
+            # Box space
+            >>> space = gym.spaces.Box(low=-1, high=1, shape=(2, 3))
+            >>> model._get_space_size(space)
+            6
+
+            # Discrete space
+            >>> space = gym.spaces.Discrete(4)
+            >>> model._get_space_size(space)
+            4
+            >>> model._get_space_size(space, number_of_elements=False)
+            1
+
+            # Dict space
+            >>> space = gym.spaces.Dict({'a': gym.spaces.Box(low=-1, high=1, shape=(2, 3)), 
+            ...                          'b': gym.spaces.Discrete(4)})
+            >>> model._get_space_size(space)
+            10
+            >>> model._get_space_size(space, number_of_elements=False)
+            7
         """
         size = None
         if type(space) in [int, float]:
@@ -100,16 +151,22 @@ def _get_space_size(self, space: Union[int, Tuple[int], gym.Space]) -> int:
             size = np.prod(space)
         elif issubclass(type(space), gym.Space):
             if issubclass(type(space), gym.spaces.Discrete):
-                size = space.n
+                if number_of_elements:
+                    size = space.n
+                else:
+                    size = 1
             elif issubclass(type(space), gym.spaces.Box):
                 size = np.prod(space.shape)
             elif issubclass(type(space), gym.spaces.Dict):
-                size = sum([self._get_space_size(space.spaces[key]) for key in space.spaces])
+                size = sum([self._get_space_size(space.spaces[key], number_of_elements) for key in space.spaces])
         if size is None:
             raise ValueError("Space type {} not supported".format(type(space)))
         return int(size)
 
-    def tensor_to_space(self, tensor: torch.Tensor, space: gym.Space, start: int = 0) -> Union[torch.Tensor, dict]:
+    def tensor_to_space(self, 
+                        tensor: torch.Tensor, 
+                        space: gym.Space, 
+                        start: int = 0) -> Union[torch.Tensor, dict]:
         """Map a flat tensor to a Gym space
 
         The mapping is done in the following way:
@@ -119,17 +176,28 @@ def tensor_to_space(self, tensor: torch.Tensor, space: gym.Space, start: int = 0
           keeping the first dimension (number of samples) as they are
         - Tensors belonging to Dict spaces are mapped into a dictionary with the same keys as the original space
 
-        :param tensor: Tensor to map
+        :param tensor: Tensor to map from
         :type tensor: torch.Tensor
         :param space: Space to map the tensor to
         :type space: gym.Space
-        :param start: Index of the first element of the tensor to map (default: 0)
+        :param start: Index of the first element of the tensor to map (default: ``0``)
         :type start: int, optional
 
         :raises ValueError: If the space is not supported
 
         :return: Mapped tensor or dictionary
         :rtype: torch.Tensor or dict
+
+        Example::
+
+            >>> space = gym.spaces.Dict({'a': gym.spaces.Box(low=-1, high=1, shape=(2, 3)), 
+            ...                          'b': gym.spaces.Discrete(4)})
+            >>> tensor = torch.tensor([[-0.3, -0.2, -0.1, 0.1, 0.2, 0.3, 2]])
+            >>>
+            >>> model.tensor_to_space(tensor, space)
+            {'a': tensor([[[-0.3000, -0.2000, -0.1000],
+                           [ 0.1000,  0.2000,  0.3000]]]),
+             'b': tensor([[2.]])}
         """
         if issubclass(type(space), gym.spaces.Discrete):
             return tensor
@@ -138,7 +206,7 @@ def tensor_to_space(self, tensor: torch.Tensor, space: gym.Space, start: int = 0
         elif issubclass(type(space), gym.spaces.Dict):
             output = {}
             for k in sorted(space.keys()):
-                end = start + self._get_space_size(space[k])
+                end = start + self._get_space_size(space[k], number_of_elements=False)
                 output[k] = self.tensor_to_space(tensor[:, start:end], space[k], end)
                 start = end
             return output
@@ -146,22 +214,25 @@ def tensor_to_space(self, tensor: torch.Tensor, space: gym.Space, start: int = 0
 
     def random_act(self, 
                    states: torch.Tensor, 
-                   taken_actions: Union[torch.Tensor, None] = None, 
-                   inference=False) -> Tuple[torch.Tensor]:
+                   taken_actions: Optional[torch.Tensor] = None, 
+                   inference: bool = False,
+                   role: str = "") -> Sequence[torch.Tensor]:
         """Act randomly according to the action space
 
         :param states: Observation/state of the environment used to get the shape of the action space
         :type states: torch.Tensor
-        :param taken_actions: Actions taken by a policy to the given states (default: None).
+        :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
-        :type taken_actions: torch.Tensor or None, optional
-        :param inference: Flag to indicate whether the model is making inference (default: False)
+        :type taken_actions: torch.Tensor, optional
+        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
+        :param role: Role of the model (default: ``""``)
+        :type role: str, optional
 
         :raises NotImplementedError: Unsupported action space
 
         :return: Random actions to be taken by the agent
-        :rtype: tuple of torch.Tensor
+        :rtype: sequence of torch.Tensor
         """
         # discrete action space (Discrete)
         if issubclass(type(self.action_space), gym.spaces.Discrete):
@@ -183,12 +254,20 @@ def init_parameters(self, method_name: str = "normal_", *args, **kwargs) -> None
         Method names are from the `torch.nn.init <https://pytorch.org/docs/stable/nn.init.html>`_ module. 
         Allowed method names are *uniform_*, *normal_*, *constant_*, etc.
 
-        :param method_name: `torch.nn.init <https://pytorch.org/docs/stable/nn.init.html>`_ method name (default: "normal\_")
+        :param method_name: `torch.nn.init <https://pytorch.org/docs/stable/nn.init.html>`_ method name (default: ``"normal_"``)
         :type method_name: str, optional
         :param args: Positional arguments of the method to be called
         :type args: tuple, optional
         :param kwargs: Key-value arguments of the method to be called
         :type kwargs: dict, optional
+
+        Example::
+
+            # initialize all parameters with an orthogonal distribution with a gain of 0.5
+            >>> model.init_parameters("orthogonal_", gain=0.5)
+
+            # initialize all parameters as a sparse matrix with a sparsity of 0.1
+            >>> model.init_parameters("sparse_", sparsity=0.1)
         """
         for parameters in self.parameters():
             exec("torch.nn.init.{}(parameters, *args, **kwargs)".format(method_name))
@@ -202,12 +281,20 @@ def init_weights(self, method_name: str = "orthogonal_", *args, **kwargs) -> Non
         The following layers will be initialized:
         - torch.nn.Linear
         
-        :param method_name: `torch.nn.init <https://pytorch.org/docs/stable/nn.init.html>`_ method name (default: "orthogonal\_")
+        :param method_name: `torch.nn.init <https://pytorch.org/docs/stable/nn.init.html>`_ method name (default: ``"orthogonal_"``)
         :type method_name: str, optional
         :param args: Positional arguments of the method to be called
         :type args: tuple, optional
         :param kwargs: Key-value arguments of the method to be called
         :type kwargs: dict, optional
+
+        Example::
+
+            # initialize all weights with uniform distribution in range [-0.1, 0.1]
+            >>> model.init_weights(method_name="uniform_", a=-0.1, b=0.1)
+
+            # initialize all weights with normal distribution with mean 0 and standard deviation 0.25
+            >>> model.init_weights(method_name="normal_", mean=0.0, std=0.25)
         """
         def _update_weights(module, method_name, args, kwargs):
             for layer in module:
@@ -227,30 +314,30 @@ def forward(self):
 
     def compute(self, 
                 states: torch.Tensor, 
-                taken_actions: Union[torch.Tensor, None] = None,
-                role: str = "") -> Union[torch.Tensor, Tuple[torch.Tensor]]:
+                taken_actions: Optional[torch.Tensor] = None,
+                role: str = "") -> Union[torch.Tensor, Sequence[torch.Tensor]]:
         """Define the computation performed (to be implemented by the inheriting classes) by the models
 
         :param states: Observation/state of the environment used to make the decision
         :type states: torch.Tensor
-        :param taken_actions: Actions taken by a policy to the given states (default: None).
+        :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
-        :type taken_actions: torch.Tensor or None, optional
-        :param role: Role of the agent (default: "")
+        :type taken_actions: torch.Tensor, optional
+        :param role: Role of the model (default: ``""``)
         :type role: str, optional
 
         :raises NotImplementedError: Child class must implement this method
         
         :return: Computation performed by the models
-        :rtype: torch.Tensor or tuple of torch.Tensor
+        :rtype: torch.Tensor or sequence of torch.Tensor
         """
         raise NotImplementedError("The computation performed by the models (.compute()) is not implemented")
 
     def act(self, 
             states: torch.Tensor, 
-            taken_actions: Union[torch.Tensor, None] = None, 
-            inference=False,
-            role: str = "") -> Tuple[torch.Tensor]:
+            taken_actions: Optional[torch.Tensor] = None, 
+            inference: bool = False,
+            role: str = "") -> Sequence[torch.Tensor]:
         """Act according to the specified behavior (to be implemented by the inheriting classes)
 
         Agents will call this method to obtain the decision to be taken given the state of the environment.
@@ -259,31 +346,31 @@ def act(self,
 
         :param states: Observation/state of the environment used to make the decision
         :type states: torch.Tensor
-        :param taken_actions: Actions taken by a policy to the given states (default: None).
+        :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
-        :type taken_actions: torch.Tensor or None, optional
-        :param inference: Flag to indicate whether the model is making inference (default: False)
+        :type taken_actions: torch.Tensor, optional
+        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
-        :param role: Role of the agent (default: "")
+        :param role: Role of the model (default: ``""``)
         :type role: str, optional
 
         :raises NotImplementedError: Child class must implement this method
         
         :return: Action to be taken by the agent given the state of the environment.
-                 The typical tuple's components are the actions, the log of the probability density function and mean actions.
+                 The typical sequence's components are the actions, the log of the probability density function and mean actions.
                  Deterministic agents must ignore the last two components and return empty tensors or None for them
-        :rtype: tuple of torch.Tensor
+        :rtype: sequence of torch.Tensor
         """
         raise NotImplementedError("The action to be taken by the agent (.act()) is not implemented")
         
     def set_mode(self, mode: str) -> None:
         """Set the model mode (training or evaluation)
 
-        :param mode: Mode: "train" for training or "eval" for evaluation. 
+        :param mode: Mode: ``"train"`` for training or ``"eval"`` for evaluation. 
             See `torch.nn.Module.train <https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.train>`_
         :type mode: str
 
-        :raises ValueError: Mode must be ``"train"`` or ``"eval"``
+        :raises ValueError: If the mode is not ``"train"`` or ``"eval"``
         """
         if mode == "train":
             self.train(True)
@@ -292,41 +379,64 @@ def set_mode(self, mode: str) -> None:
         else:
             raise ValueError("Invalid mode. Use 'train' for training or 'eval' for evaluation")
 
-    def save(self, path: str, state_dict: Union[dict, None] = None) -> None:
+    def save(self, path: str, state_dict: Optional[dict] = None) -> None:
         """Save the model to the specified path
             
         :param path: Path to save the model to
         :type path: str
-        :param state_dict: State dictionary to save (default: None).
+        :param state_dict: State dictionary to save (default: ``None``).
                            If None, the model's state_dict will be saved
         :type state_dict: dict, optional
+
+        Example::
+
+            # save the current model to the specified path
+            >>> model.save("/tmp/model.pt")
+
+            # save an older version of the model to the specified path
+            >>> old_state_dict = copy.deepcopy(model.state_dict())
+            >>> # ...
+            >>> model.save("/tmp/model.pt", old_state_dict)
+
         """
         torch.save(self.state_dict() if state_dict is None else state_dict, path)
 
     def load(self, path: str) -> None:
         """Load the model from the specified path
-                
+
+        The final storage device is determined by the constructor of the model
+
         :param path: Path to load the model from
         :type path: str
+
+        Example::
+
+            # load the model onto the CPU
+            >>> model = Model(observation_space, action_space, device="cpu")
+            >>> model.load("model.pt")
+
+            # load the model onto the GPU 1
+            >>> model = Model(observation_space, action_space, device="cuda:1")
+            >>> model.load("model.pt")
         """
         self.load_state_dict(torch.load(path, map_location=self.device))
         self.eval()
 
     def migrate(self,
                 state_dict: Mapping[str, torch.Tensor],
-                name_map: Optional[Mapping[str, str]] = {},
-                auto_mapping: Optional[bool] = True,
-                show_names: Optional[bool] = False) -> bool:
+                name_map: Mapping[str, str] = {},
+                auto_mapping: bool = True,
+                show_names: bool = False) -> bool:
         """Migrate the specified extrernal model's state dict to the current model
 
         :param state_dict: External model's state dict to migrate from
         :type state_dict: Mapping[str, torch.Tensor]
-        :param name_map: Name map to use for the migration (default: {}).
+        :param name_map: Name map to use for the migration (default: ``{}``).
                          Keys are the current parameter names and values are the external parameter names
         :type name_map: Mapping[str, str], optional
-        :param auto_mapping: Automatically map the external state dict to the current state dict (default: True)
+        :param auto_mapping: Automatically map the external state dict to the current state dict (default: ``True``)
         :type auto_mapping: bool, optional
-        :param show_names: Show the names of both, current and external state dicts parameters (default: False)
+        :param show_names: Show the names of both, current and external state dicts parameters (default: ``False``)
         :type show_names: bool, optional
 
         :return: True if the migration was successful, False otherwise.
@@ -406,7 +516,7 @@ def freeze_parameters(self, freeze: bool = True) -> None:
         - Freeze: disable gradient computation (``parameters.requires_grad = False``)
         - Unfreeze: enable gradient computation (``parameters.requires_grad = True``) 
         
-        :param freeze: Freeze the internal parameters if True, otherwise unfreeze them
+        :param freeze: Freeze the internal parameters if True, otherwise unfreeze them (default: ``True``)
         :type freeze: bool, optional
         """
         for parameters in self.parameters():
@@ -417,12 +527,20 @@ def update_parameters(self, model: torch.nn.Module, polyak: float = 1) -> None:
 
         - Hard update: :math:`\\theta = \\theta_{net}`
         - Soft (polyak averaging) update: :math:`\\theta = (1 - \\rho) \\theta + \\rho \\theta_{net}`
-        
+
         :param model: Model used to update the internal parameters
         :type model: torch.nn.Module (skrl.models.torch.Model)
-        :param polyak: Polyak hyperparameter between 0 and 1 (usually close to 0).
-                       A hard update is performed when its value is 1 (default)
+        :param polyak: Polyak hyperparameter between 0 and 1 (default: ``1``).
+                       A hard update is performed when its value is 1
         :type polyak: float, optional
+
+        Example::
+
+            # hard update (from source model)
+            >>> model.update_parameters(source_model)
+
+            # soft update (from source model)
+            >>> model.update_parameters(source_model, polyak=0.005)
         """
         with torch.no_grad():
             # hard update
diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py
index 19aa2594..ab74a029 100644
--- a/skrl/models/torch/categorical.py
+++ b/skrl/models/torch/categorical.py
@@ -1,4 +1,4 @@
-from typing import Union, Tuple
+from typing import Optional, Union, Sequence
 
 import gym
 
@@ -10,25 +10,60 @@
 
 class CategoricalModel(Model):
     def __init__(self, 
-                 observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
-                 action_space: Union[int, Tuple[int], gym.Space, None] = None, 
+                 observation_space: Union[int, Sequence[int], gym.Space], 
+                 action_space: Union[int, Sequence[int], gym.Space], 
                  device: Union[str, torch.device] = "cuda:0",
                  unnormalized_log_prob: bool = True) -> None:
         """Categorical model (stochastic model)
 
-        :param observation_space: Observation/state space or shape (default: None).
-                                  If it is not None, the num_observations property will contain the size of that space
-        :type observation_space: int, tuple or list of integers, gym.Space or None, optional
-        :param action_space: Action space or shape (default: None).
-                             If it is not None, the num_actions property will contain the size of that space
-        :type action_space: int, tuple or list of integers, gym.Space or None, optional
-        :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0")
+        :param observation_space: Observation/state space or shape.
+                                  The ``num_observations`` property will contain the size of that space
+        :type observation_space: int, sequence of int, gym.Space
+        :param action_space: Action space or shape.
+                             The ``num_actions`` property will contain the size of that space
+        :type action_space: int, sequence of int, gym.Space
+        :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``)
         :type device: str or torch.device, optional
-        :param unnormalized_log_prob: Flag to indicate how to be interpreted the model's output (default: True).
+        :param unnormalized_log_prob: Flag to indicate how to be interpreted the model's output (default: ``True``).
                                       If True, the model's output is interpreted as unnormalized log probabilities 
                                       (it can be any real number), otherwise as normalized probabilities 
                                       (the output must be non-negative, finite and have a non-zero sum)
         :type unnormalized_log_prob: bool, optional
+
+        Example::
+
+            # define the model
+            >>> import torch
+            >>> import torch.nn as nn
+            >>> from skrl.models.torch import CategoricalModel
+            >>> 
+            >>> class Policy(CategoricalModel):
+            ...     def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True):
+            ...         super().__init__(observation_space, action_space, device, unnormalized_log_prob)
+            ...
+            ...         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+            ...                                  nn.ELU(),
+            ...                                  nn.Linear(32, 32),
+            ...                                  nn.ELU(),
+            ...                                  nn.Linear(32, self.num_actions))
+            ...
+            ...     def compute(self, states, taken_actions, role):
+            ...         return self.net(states)
+            ...
+            >>> # given an observation_space: gym.spaces.Box with shape (4,)
+            >>> # and an action_space: gym.spaces.Discrete with n = 2
+            >>> model = Policy(observation_space, action_space)
+            >>> 
+            >>> print(model)
+            Policy(
+              (net): Sequential(
+                (0): Linear(in_features=4, out_features=32, bias=True)
+                (1): ELU(alpha=1.0)
+                (2): Linear(in_features=32, out_features=32, bias=True)
+                (3): ELU(alpha=1.0)
+                (4): Linear(in_features=32, out_features=2, bias=True)
+              )
+            )
         """
         super(CategoricalModel, self).__init__(observation_space, action_space, device)
 
@@ -38,25 +73,31 @@ def __init__(self,
 
     def act(self, 
             states: torch.Tensor, 
-            taken_actions: Union[torch.Tensor, None] = None, 
-            inference=False,
-            role: str = "") -> Tuple[torch.Tensor]:
+            taken_actions: Optional[torch.Tensor] = None, 
+            inference: bool = False,
+            role: str = "") -> Sequence[torch.Tensor]:
         """Act stochastically in response to the state of the environment
 
         :param states: Observation/state of the environment used to make the decision
         :type states: torch.Tensor
-        :param taken_actions: Actions taken by a policy to the given states (default: None).
+        :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
-        :type taken_actions: torch.Tensor or None, optional
-        :param inference: Flag to indicate whether the model is making inference (default: False).
-                          If True, the returned tensors will be detached from the current graph
+        :type taken_actions: torch.Tensor, optional
+        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
-        :param role: Role of the agent (default: "")
+        :param role: Role of the model (default: ``""``)
         :type role: str, optional
 
         :return: Action to be taken by the agent given the state of the environment.
-                 The tuple's components are the actions, the log of the probability density function and the model's output
-        :rtype: tuple of torch.Tensor
+                 The sequence's components are the actions, the log of the probability density function and the model's output
+        :rtype: sequence of torch.Tensor
+
+        Example::
+
+            >>> # given a batch of sample states with shape (4096, 4)
+            >>> action, log_prob, net_output = model.act(states)
+            >>> print(action.shape, log_prob.shape, net_output.shape)
+            torch.Size([4096, 1]) torch.Size([4096, 1]) torch.Size([4096, 2])
         """
         # map from states/observations to normalized probabilities or unnormalized log probabilities
         if self._instantiator_net is None:
@@ -87,5 +128,11 @@ def distribution(self) -> torch.distributions.Categorical:
 
         :return: Distribution of the model
         :rtype: torch.distributions.Categorical
+
+        Example::
+
+            >>> distribution = model.distribution()
+            >>> print(distribution)
+            Categorical(probs: torch.Size([4096, 2]), logits: torch.Size([4096, 2]))
         """
         return self._distribution
\ No newline at end of file
diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py
index 7ee8c63b..634ef791 100644
--- a/skrl/models/torch/deterministic.py
+++ b/skrl/models/torch/deterministic.py
@@ -1,4 +1,4 @@
-from typing import Union, Tuple
+from typing import Optional, Union, Sequence
 
 import gym
 
@@ -9,22 +9,57 @@
 
 class DeterministicModel(Model):
     def __init__(self, 
-                 observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
-                 action_space: Union[int, Tuple[int], gym.Space, None] = None, 
+                 observation_space: Union[int, Sequence[int], gym.Space], 
+                 action_space: Union[int, Sequence[int], gym.Space], 
                  device: Union[str, torch.device] = "cuda:0", 
                  clip_actions: bool = False) -> None:
         """Deterministic model (deterministic model)
 
-        :param observation_space: Observation/state space or shape (default: None).
-                                  If it is not None, the num_observations property will contain the size of that space
-        :type observation_space: int, tuple or list of integers, gym.Space or None, optional
-        :param action_space: Action space or shape (default: None).
-                             If it is not None, the num_actions property will contain the size of that space
-        :type action_space: int, tuple or list of integers, gym.Space or None, optional
-        :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0")
+        :param observation_space: Observation/state space or shape.
+                                  The ``num_observations`` property will contain the size of that space
+        :type observation_space: int, sequence of int, gym.Space
+        :param action_space: Action space or shape.
+                             The ``num_actions`` property will contain the size of that space
+        :type action_space: int, sequence of int, gym.Space
+        :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``)
         :type device: str or torch.device, optional
-        :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: False)
+        :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: ``False``)
         :type clip_actions: bool, optional
+
+        Example::
+
+            # define the model
+            >>> import torch
+            >>> import torch.nn as nn
+            >>> from skrl.models.torch import DeterministicModel
+            >>> 
+            >>> class Value(DeterministicModel):
+            ...     def __init__(self, observation_space, action_space, device, clip_actions=False):
+            ...         super().__init__(observation_space, action_space, device, clip_actions)
+            ...
+            ...         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+            ...                                  nn.ELU(),
+            ...                                  nn.Linear(32, 32),
+            ...                                  nn.ELU(),
+            ...                                  nn.Linear(32, 1))
+            ...
+            ...     def compute(self, states, taken_actions, role):
+            ...         return self.net(states)
+            ...
+            >>> # given an observation_space: gym.spaces.Box with shape (60,)
+            >>> # and an action_space: gym.spaces.Box with shape (8,)
+            >>> model = Value(observation_space, action_space)
+            >>> 
+            >>> print(model)
+            Value(
+              (net): Sequential(
+                (0): Linear(in_features=60, out_features=32, bias=True)
+                (1): ELU(alpha=1.0)
+                (2): Linear(in_features=32, out_features=32, bias=True)
+                (3): ELU(alpha=1.0)
+                (4): Linear(in_features=32, out_features=1, bias=True)
+              )
+            )
         """
         super(DeterministicModel, self).__init__(observation_space, action_space, device)
 
@@ -39,25 +74,31 @@ def __init__(self,
         
     def act(self, 
             states: torch.Tensor, 
-            taken_actions: Union[torch.Tensor, None] = None, 
-            inference=False,
-            role: str = "") -> Tuple[torch.Tensor]:
+            taken_actions: Optional[torch.Tensor] = None, 
+            inference: bool = False,
+            role: str = "") -> Sequence[torch.Tensor]:
         """Act deterministically in response to the state of the environment
 
         :param states: Observation/state of the environment used to make the decision
         :type states: torch.Tensor
-        :param taken_actions: Actions taken by a policy to the given states (default: None).
+        :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
-        :type taken_actions: torch.Tensor or None, optional
-        :param inference: Flag to indicate whether the model is making inference (default: False).
-                          If True, the returned tensors will be detached from the current graph
+        :type taken_actions: torch.Tensor, optional
+        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
-        :param role: Role of the agent (default: "")
+        :param role: Role of the model (default: ``""``)
         :type role: str, optional
 
         :return: Action to be taken by the agent given the state of the environment.
-                 The tuple's components are the computed actions and None for the last two components
-        :rtype: tuple of torch.Tensor
+                 The sequence's components are the computed actions and None for the last two components
+        :rtype: sequence of torch.Tensor
+
+        Example::
+
+            >>> # given a batch of sample states with shape (4096, 60)
+            >>> output = model.act(states)
+            >>> print(output[0].shape, output[1], output[2])
+            torch.Size([4096, 1]) None None
         """
         # map from observations/states to actions
         if self._instantiator_net is None:
diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index dfcbc705..f8cb3203 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -1,4 +1,4 @@
-from typing import Union, Tuple
+from typing import Optional, Union, Sequence
 
 import gym
 
@@ -10,8 +10,8 @@
 
 class GaussianModel(Model):
     def __init__(self, 
-                 observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
-                 action_space: Union[int, Tuple[int], gym.Space, None] = None, 
+                 observation_space: Union[int, Sequence[int], gym.Space], 
+                 action_space: Union[int, Sequence[int], gym.Space], 
                  device: Union[str, torch.device] = "cuda:0", 
                  clip_actions: bool = False, 
                  clip_log_std: bool = True, 
@@ -20,28 +20,66 @@ def __init__(self,
                  reduction: str = "sum") -> None:
         """Gaussian model (stochastic model)
 
-        :param observation_space: Observation/state space or shape (default: None).
-                                  If it is not None, the num_observations property will contain the size of that space
-        :type observation_space: int, tuple or list of integers, gym.Space or None, optional
-        :param action_space: Action space or shape (default: None).
-                             If it is not None, the num_actions property will contain the size of that space
-        :type action_space: int, tuple or list of integers, gym.Space or None, optional
-        :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0")
+        :param observation_space: Observation/state space or shape.
+                                  The ``num_observations`` property will contain the size of that space
+        :type observation_space: int, sequence of int, gym.Space
+        :param action_space: Action space or shape.
+                             The ``num_actions`` property will contain the size of that space
+        :type action_space: int, sequence of int, gym.Space
+        :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``)
         :type device: str or torch.device, optional
-        :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: False)
+        :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: ``False``)
         :type clip_actions: bool, optional
-        :param clip_log_std: Flag to indicate whether the log standard deviations should be clipped (default: True)
+        :param clip_log_std: Flag to indicate whether the log standard deviations should be clipped (default: ``True``)
         :type clip_log_std: bool, optional
-        :param min_log_std: Minimum value of the log standard deviation if clip_log_std is True (default: -20)
+        :param min_log_std: Minimum value of the log standard deviation if ``clip_log_std`` is True (default: ``-20``)
         :type min_log_std: float, optional
-        :param max_log_std: Maximum value of the log standard deviation if clip_log_std is True (default: 2)
+        :param max_log_std: Maximum value of the log standard deviation if ``clip_log_std`` is True (default: ``2``)
         :type max_log_std: float, optional
-        :param reduction: Reduction method for returning the log probability density function: (default: "sum").
-                          Supported values are "mean", "sum", "prod" and "none". If "none", the log probability density 
-                          function is returned as a tensor of shape (num_samples, num_actions) instead of (num_samples, 1)
+        :param reduction: Reduction method for returning the log probability density function: (default: ``"sum"``).
+                          Supported values are ``"mean"``, ``"sum"``, ``"prod"`` and ``"none"``. If "``none"``, the log probability density 
+                          function is returned as a tensor of shape ``(num_samples, num_actions)`` instead of ``(num_samples, 1)``
         :type reduction: str, optional
 
         :raises ValueError: If the reduction method is not valid
+
+        Example::
+
+            # define the model
+            >>> import torch
+            >>> import torch.nn as nn
+            >>> from skrl.models.torch import GaussianModel
+            >>> 
+            >>> class Policy(GaussianModel):
+            ...     def __init__(self, observation_space, action_space, device, clip_actions=False,
+            ...                  clip_log_std=True, min_log_std=-20, max_log_std=2):
+            ...         super().__init__(observation_space, action_space, device, clip_actions,
+            ...                          clip_log_std, min_log_std, max_log_std)
+            ...
+            ...         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+            ...                                  nn.ELU(),
+            ...                                  nn.Linear(32, 32),
+            ...                                  nn.ELU(),
+            ...                                  nn.Linear(32, self.num_actions))
+            ...         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+            ...
+            ...     def compute(self, states, taken_actions, role):
+            ...         return self.net(states), self.log_std_parameter
+            ...
+            >>> # given an observation_space: gym.spaces.Box with shape (60,)
+            >>> # and an action_space: gym.spaces.Box with shape (8,)
+            >>> model = Policy(observation_space, action_space)
+            >>> 
+            >>> print(model)
+            Policy(
+              (net): Sequential(
+                (0): Linear(in_features=60, out_features=32, bias=True)
+                (1): ELU(alpha=1.0)
+                (2): Linear(in_features=32, out_features=32, bias=True)
+                (3): ELU(alpha=1.0)
+                (4): Linear(in_features=32, out_features=8, bias=True)
+              )
+            )
         """
         super(GaussianModel, self).__init__(observation_space, action_space, device)
         
@@ -69,25 +107,31 @@ def __init__(self,
 
     def act(self, 
             states: torch.Tensor, 
-            taken_actions: Union[torch.Tensor, None] = None, 
-            inference=False,
-            role: str = "") -> Tuple[torch.Tensor]:
+            taken_actions: Optional[torch.Tensor] = None, 
+            inference: bool = False,
+            role: str = "") -> Sequence[torch.Tensor]:
         """Act stochastically in response to the state of the environment
 
         :param states: Observation/state of the environment used to make the decision
         :type states: torch.Tensor
-        :param taken_actions: Actions taken by a policy to the given states (default: None).
+        :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
-        :type taken_actions: torch.Tensor or None, optional
-        :param inference: Flag to indicate whether the model is making inference (default: False).
-                          If True, the returned tensors will be detached from the current graph
+        :type taken_actions: torch.Tensor, optional
+        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
-        :param role: Role of the agent (default: "")
+        :param role: Role of the model (default: ``""``)
         :type role: str, optional
         
         :return: Action to be taken by the agent given the state of the environment.
-                 The tuple's components are the actions, the log of the probability density function and mean actions
-        :rtype: tuple of torch.Tensor
+                 The sequence's components are the actions, the log of the probability density function and mean actions
+        :rtype: sequence of torch.Tensor
+
+        Example::
+
+            >>> # given a batch of sample states with shape (4096, 60)
+            >>> action, log_prob, mean_action = model.act(states)
+            >>> print(action.shape, log_prob.shape, mean_action.shape)
+            torch.Size([4096, 8]) torch.Size([4096, 1]) torch.Size([4096, 8])
         """
         # map from states/observations to mean actions and log standard deviations
         if self._instantiator_net is None:
@@ -134,6 +178,12 @@ def get_entropy(self) -> torch.Tensor:
 
         :return: Entropy of the model
         :rtype: torch.Tensor
+
+        Example::
+
+            >>> entropy = model.get_entropy()
+            >>> print(entropy.shape)
+            torch.Size([4096, 8])
         """
         if self._distribution is None:
             return torch.tensor(0.0, device=self.device)
@@ -144,6 +194,12 @@ def get_log_std(self) -> torch.Tensor:
 
         :return: Log standard deviation of the model
         :rtype: torch.Tensor
+
+        Example::
+
+            >>> log_std = model.get_log_std()
+            >>> print(log_std.shape)
+            torch.Size([4096, 8])
         """
         return self._log_std.repeat(self._num_samples, 1)
     
@@ -152,5 +208,11 @@ def distribution(self) -> torch.distributions.Normal:
 
         :return: Distribution of the model
         :rtype: torch.distributions.Normal
+
+        Example::
+
+            >>> distribution = model.distribution()
+            >>> print(distribution)
+            Normal(loc: torch.Size([4096, 8]), scale: torch.Size([4096, 8]))
         """
         return self._distribution
diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py
index 1b06f813..18d2d963 100644
--- a/skrl/models/torch/multivariate_gaussian.py
+++ b/skrl/models/torch/multivariate_gaussian.py
@@ -1,4 +1,4 @@
-from typing import Union, Tuple
+from typing import Optional, Union, Sequence
 
 import gym
 
@@ -10,8 +10,8 @@
 
 class MultivariateGaussianModel(Model):
     def __init__(self, 
-                 observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
-                 action_space: Union[int, Tuple[int], gym.Space, None] = None, 
+                 observation_space: Union[int, Sequence[int], gym.Space], 
+                 action_space: Union[int, Sequence[int], gym.Space], 
                  device: Union[str, torch.device] = "cuda:0", 
                  clip_actions: bool = False, 
                  clip_log_std: bool = True, 
@@ -19,22 +19,60 @@ def __init__(self,
                  max_log_std: float = 2) -> None:
         """Multivariate Gaussian model (stochastic model)
 
-        :param observation_space: Observation/state space or shape (default: None).
-                                  If it is not None, the num_observations property will contain the size of that space
-        :type observation_space: int, tuple or list of integers, gym.Space or None, optional
-        :param action_space: Action space or shape (default: None).
-                             If it is not None, the num_actions property will contain the size of that space
-        :type action_space: int, tuple or list of integers, gym.Space or None, optional
-        :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0")
+        :param observation_space: Observation/state space or shape.
+                                  The ``num_observations`` property will contain the size of that space
+        :type observation_space: int, sequence of int, gym.Space
+        :param action_space: Action space or shape.
+                             The ``num_actions`` property will contain the size of that space
+        :type action_space: int, sequence of int, gym.Space
+        :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``)
         :type device: str or torch.device, optional
-        :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: False)
+        :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: ``False``)
         :type clip_actions: bool, optional
-        :param clip_log_std: Flag to indicate whether the log standard deviations should be clipped (default: True)
+        :param clip_log_std: Flag to indicate whether the log standard deviations should be clipped (default: ``True``)
         :type clip_log_std: bool, optional
-        :param min_log_std: Minimum value of the log standard deviation if clip_log_std is True (default: -20)
+        :param min_log_std: Minimum value of the log standard deviation if ``clip_log_std`` is True (default: ``-20``)
         :type min_log_std: float, optional
-        :param max_log_std: Maximum value of the log standard deviation if clip_log_std is True (default: 2)
+        :param max_log_std: Maximum value of the log standard deviation if ``clip_log_std`` is True (default: ``2``)
         :type max_log_std: float, optional
+
+        Example::
+
+            # define the model
+            >>> import torch
+            >>> import torch.nn as nn
+            >>> from skrl.models.torch import MultivariateGaussianModel
+            >>> 
+            >>> class Policy(MultivariateGaussianModel):
+            ...     def __init__(self, observation_space, action_space, device, clip_actions=False,
+            ...                  clip_log_std=True, min_log_std=-20, max_log_std=2):
+            ...         super().__init__(observation_space, action_space, device, clip_actions,
+            ...                          clip_log_std, min_log_std, max_log_std)
+            ...
+            ...         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+            ...                                  nn.ELU(),
+            ...                                  nn.Linear(32, 32),
+            ...                                  nn.ELU(),
+            ...                                  nn.Linear(32, self.num_actions))
+            ...         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+            ...
+            ...     def compute(self, states, taken_actions, role):
+            ...         return self.net(states), self.log_std_parameter
+            ...
+            >>> # given an observation_space: gym.spaces.Box with shape (60,)
+            >>> # and an action_space: gym.spaces.Box with shape (8,)
+            >>> model = Policy(observation_space, action_space)
+            >>> 
+            >>> print(model)
+            Policy(
+              (net): Sequential(
+                (0): Linear(in_features=60, out_features=32, bias=True)
+                (1): ELU(alpha=1.0)
+                (2): Linear(in_features=32, out_features=32, bias=True)
+                (3): ELU(alpha=1.0)
+                (4): Linear(in_features=32, out_features=8, bias=True)
+              )
+            )
         """
         super(MultivariateGaussianModel, self).__init__(observation_space, action_space, device)
         
@@ -57,25 +95,31 @@ def __init__(self,
         
     def act(self, 
             states: torch.Tensor, 
-            taken_actions: Union[torch.Tensor, None] = None, 
-            inference=False,
-            role: str = "") -> Tuple[torch.Tensor]:
+            taken_actions: Optional[torch.Tensor] = None, 
+            inference: bool = False,
+            role: str = "") -> Sequence[torch.Tensor]:
         """Act stochastically in response to the state of the environment
 
         :param states: Observation/state of the environment used to make the decision
         :type states: torch.Tensor
-        :param taken_actions: Actions taken by a policy to the given states (default: None).
+        :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
-        :type taken_actions: torch.Tensor or None, optional
-        :param inference: Flag to indicate whether the model is making inference (default: False).
-                          If True, the returned tensors will be detached from the current graph
+        :type taken_actions: torch.Tensor, optional
+        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
-        :param role: Role of the agent (default: "")
+        :param role: Role of the model (default: ``""``)
         :type role: str, optional
         
         :return: Action to be taken by the agent given the state of the environment.
-                 The tuple's components are the actions, the log of the probability density function and mean actions
-        :rtype: tuple of torch.Tensor
+                 The sequence's components are the actions, the log of the probability density function and mean actions
+        :rtype: sequence of torch.Tensor
+
+        Example::
+
+            >>> # given a batch of sample states with shape (4096, 60)
+            >>> action, log_prob, mean_action = model.act(states)
+            >>> print(action.shape, log_prob.shape, mean_action.shape)
+            torch.Size([4096, 8]) torch.Size([4096, 1]) torch.Size([4096, 8])
         """
         # map from states/observations to mean actions and log standard deviations
         if self._instantiator_net is None:
@@ -121,6 +165,12 @@ def get_entropy(self) -> torch.Tensor:
 
         :return: Entropy of the model
         :rtype: torch.Tensor
+
+        Example::
+
+            >>> entropy = model.get_entropy()
+            >>> print(entropy.shape)
+            torch.Size([4096])
         """
         if self._distribution is None:
             return torch.tensor(0.0, device=self.device)
@@ -131,6 +181,12 @@ def get_log_std(self) -> torch.Tensor:
 
         :return: Log standard deviation of the model
         :rtype: torch.Tensor
+
+        Example::
+
+            >>> log_std = model.get_log_std()
+            >>> print(log_std.shape)
+            torch.Size([4096, 8])
         """
         return self._log_std.repeat(self._num_samples, 1)
     
@@ -139,5 +195,11 @@ def distribution(self) -> torch.distributions.MultivariateNormal:
 
         :return: Distribution of the model
         :rtype: torch.distributions.MultivariateNormal
+
+        Example::
+
+            >>> distribution = model.distribution()
+            >>> print(distribution)
+            MultivariateNormal(loc: torch.Size([4096, 8]), scale_tril: torch.Size([4096, 8, 8]))
         """
         return self._distribution

From 30e17b64c06290bbd5de9fe2bc0d74cbe240efdb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 21 Aug 2022 23:47:12 +0200
Subject: [PATCH 039/108] Show base class properties in docs

---
 .../source/modules/skrl.models.base_class.rst | 30 +++++++++++++++----
 .../modules/skrl.resources.preprocessors.rst  |  1 -
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/docs/source/modules/skrl.models.base_class.rst b/docs/source/modules/skrl.models.base_class.rst
index 50b6ec50..5ed11e5b 100644
--- a/docs/source/modules/skrl.models.base_class.rst
+++ b/docs/source/modules/skrl.models.base_class.rst
@@ -24,9 +24,29 @@ API
 ^^^
 
 .. autoclass:: skrl.models.torch.base.Model
-   :undoc-members:
-   :show-inheritance:
-   :private-members: _get_space_size
-   :members:
+    :undoc-members:
+    :show-inheritance:
+    :private-members: _get_space_size, _get_instantiator_output
+    :members:
    
-   .. automethod:: __init__
+    .. automethod:: __init__
+
+    .. py:property:: device
+
+        Device to be used for the computations
+
+    .. py:property:: observation_space
+
+        Observation/state space. It is a replica of the class constructor parameter of the same name
+
+    .. py:property:: action_space
+
+        Action space. It is a replica of the class constructor parameter of the same name
+
+    .. py:property:: num_observations
+
+        Number of elements in the observation/state space
+
+    .. py:property:: num_actions
+
+        Number of elements in the action space
diff --git a/docs/source/modules/skrl.resources.preprocessors.rst b/docs/source/modules/skrl.resources.preprocessors.rst
index 5dedb188..d44aa631 100644
--- a/docs/source/modules/skrl.resources.preprocessors.rst
+++ b/docs/source/modules/skrl.resources.preprocessors.rst
@@ -28,7 +28,6 @@ The preprocessor class is set under the :literal:`"<variable>_preprocessor"` key
             cfg["value_preprocessor"] = RunningStandardScaler
             cfg["value_preprocessor_kwargs"] = {"size": 1, "device": device}
 
-
 .. raw:: html
 
     <hr>

From 091d46bdde2db4184729257f0421085f73b40804 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Thu, 25 Aug 2022 19:00:24 +0200
Subject: [PATCH 040/108] Skip forwarding of role parameter to models' compute
 method

---
 skrl/models/torch/base.py                  | 5 +----
 skrl/models/torch/categorical.py           | 3 +--
 skrl/models/torch/deterministic.py         | 3 +--
 skrl/models/torch/gaussian.py              | 3 +--
 skrl/models/torch/multivariate_gaussian.py | 3 +--
 5 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py
index a53bff52..40b6b517 100644
--- a/skrl/models/torch/base.py
+++ b/skrl/models/torch/base.py
@@ -314,8 +314,7 @@ def forward(self):
 
     def compute(self, 
                 states: torch.Tensor, 
-                taken_actions: Optional[torch.Tensor] = None,
-                role: str = "") -> Union[torch.Tensor, Sequence[torch.Tensor]]:
+                taken_actions: Optional[torch.Tensor] = None) -> Union[torch.Tensor, Sequence[torch.Tensor]]:
         """Define the computation performed (to be implemented by the inheriting classes) by the models
 
         :param states: Observation/state of the environment used to make the decision
@@ -323,8 +322,6 @@ def compute(self,
         :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
         :type taken_actions: torch.Tensor, optional
-        :param role: Role of the model (default: ``""``)
-        :type role: str, optional
 
         :raises NotImplementedError: Child class must implement this method
         
diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py
index ab74a029..295c4957 100644
--- a/skrl/models/torch/categorical.py
+++ b/skrl/models/torch/categorical.py
@@ -102,8 +102,7 @@ def act(self,
         # map from states/observations to normalized probabilities or unnormalized log probabilities
         if self._instantiator_net is None:
             output = self.compute(states.to(self.device), 
-                                  taken_actions.to(self.device) if taken_actions is not None else taken_actions,
-                                  role)
+                                  taken_actions.to(self.device) if taken_actions is not None else taken_actions)
         else:
             output = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py
index 634ef791..0e5f5dd4 100644
--- a/skrl/models/torch/deterministic.py
+++ b/skrl/models/torch/deterministic.py
@@ -103,8 +103,7 @@ def act(self,
         # map from observations/states to actions
         if self._instantiator_net is None:
             actions = self.compute(states.to(self.device), 
-                                   taken_actions.to(self.device) if taken_actions is not None else taken_actions,
-                                   role)
+                                   taken_actions.to(self.device) if taken_actions is not None else taken_actions)
         else:
             actions = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index f8cb3203..8950230e 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -136,8 +136,7 @@ def act(self,
         # map from states/observations to mean actions and log standard deviations
         if self._instantiator_net is None:
             actions_mean, log_std = self.compute(states.to(self.device), 
-                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions,
-                                                 role)
+                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
         else:
             actions_mean, log_std = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py
index 18d2d963..b7118b53 100644
--- a/skrl/models/torch/multivariate_gaussian.py
+++ b/skrl/models/torch/multivariate_gaussian.py
@@ -124,8 +124,7 @@ def act(self,
         # map from states/observations to mean actions and log standard deviations
         if self._instantiator_net is None:
             actions_mean, log_std = self.compute(states.to(self.device), 
-                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions,
-                                                 role)
+                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
         else:
             actions_mean, log_std = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)

From 0d42800780377ff912e63aca9a2181613abd132d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 27 Aug 2022 16:22:21 +0200
Subject: [PATCH 041/108] Create logger with colored formatting

---
 skrl/__init__.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/skrl/__init__.py b/skrl/__init__.py
index 1d0e5ee7..37fdd2aa 100644
--- a/skrl/__init__.py
+++ b/skrl/__init__.py
@@ -1,6 +1,31 @@
 import os
+import logging
+
+__all__ = ["__version__", "logger"]
+
 
 # read library version from file
 path = os.path.join(os.path.dirname(__file__), "version.txt")
 with open(path, "r") as file:
     __version__ = file.read().strip()
+
+
+# logger with format
+class _Formatter(logging.Formatter):
+    _format = "%(name)s:%(levelname)s - %(message)s (%(module)s:%(funcName)s:%(lineno)d)"
+    _formats = {logging.DEBUG: f"\x1b[38;20m{_format}\x1b[0m",
+                logging.INFO: f"\x1b[38;20m{_format}\x1b[0m",
+                logging.WARNING: f"\x1b[33;20m{_format}\x1b[0m",
+                logging.ERROR: f"\x1b[31;20m{_format}\x1b[0m",
+                logging.CRITICAL: f"\x1b[31;1m{_format}\x1b[0m"}
+
+    def format(self, record):
+        return logging.Formatter(self._formats.get(record.levelno)).format(record)
+
+_handler = logging.StreamHandler()
+_handler.setLevel(logging.DEBUG)
+_handler.setFormatter(_Formatter())
+
+logger = logging.getLogger("skrl")
+logger.setLevel(logging.DEBUG)
+logger.addHandler(_handler)

From 850c6a913d3e7661341e87b1c911bf8c0fb804a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 27 Aug 2022 19:10:52 +0200
Subject: [PATCH 042/108] Convert model classes to mixins

---
 skrl/models/torch/__init__.py              |   8 +-
 skrl/models/torch/base.py                  |   9 +-
 skrl/models/torch/categorical.py           |  67 +++++------
 skrl/models/torch/deterministic.py         |  45 +++-----
 skrl/models/torch/gaussian.py              | 123 ++++++++++++---------
 skrl/models/torch/multivariate_gaussian.py | 114 ++++++++++---------
 6 files changed, 188 insertions(+), 178 deletions(-)

diff --git a/skrl/models/torch/__init__.py b/skrl/models/torch/__init__.py
index de02edaa..ed7b6389 100644
--- a/skrl/models/torch/__init__.py
+++ b/skrl/models/torch/__init__.py
@@ -1,7 +1,7 @@
 from .base import Model
 
 from .tabular import TabularModel
-from .gaussian import GaussianModel
-from .categorical import CategoricalModel
-from .deterministic import DeterministicModel
-from .multivariate_gaussian import MultivariateGaussianModel
+from .gaussian import GaussianMixin
+from .categorical import CategoricalMixin
+from .deterministic import DeterministicMixin
+from .multivariate_gaussian import MultivariateGaussianMixin
diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py
index 40b6b517..c51e98da 100644
--- a/skrl/models/torch/base.py
+++ b/skrl/models/torch/base.py
@@ -6,6 +6,8 @@
 
 import torch
 
+from skrl import logger
+
 
 class Model(torch.nn.Module):
     def __init__(self, 
@@ -38,7 +40,7 @@ def __init__(self,
 
             class CustomModel(Model):
                 def __init__(self, observation_space, action_space, device="cuda:0"):
-                    super().__init__(observation_space, action_space, device)
+                    Model.__init__(self, observation_space, action_space, device)
 
                     self.layer_1 = nn.Linear(self.num_observations, 64)
                     self.layer_2 = nn.Linear(64, self.num_actions)
@@ -226,7 +228,7 @@ def random_act(self,
         :type taken_actions: torch.Tensor, optional
         :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
-        :param role: Role of the model (default: ``""``)
+        :param role: Role play by the model (default: ``""``)
         :type role: str, optional
 
         :raises NotImplementedError: Unsupported action space
@@ -348,7 +350,7 @@ def act(self,
         :type taken_actions: torch.Tensor, optional
         :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
-        :param role: Role of the model (default: ``""``)
+        :param role: Role play by the model (default: ``""``)
         :type role: str, optional
 
         :raises NotImplementedError: Child class must implement this method
@@ -358,6 +360,7 @@ def act(self,
                  Deterministic agents must ignore the last two components and return empty tensors or None for them
         :rtype: sequence of torch.Tensor
         """
+        logger.warn("Make sure to place Mixins before Model during model definition")
         raise NotImplementedError("The action to be taken by the agent (.act()) is not implemented")
         
     def set_mode(self, mode: str) -> None:
diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py
index 295c4957..dc9e06ec 100644
--- a/skrl/models/torch/categorical.py
+++ b/skrl/models/torch/categorical.py
@@ -1,45 +1,32 @@
-from typing import Optional, Union, Sequence
-
-import gym
+from typing import Optional, Sequence
 
 import torch
 from torch.distributions import Categorical
 
-from . import Model
-
-
-class CategoricalModel(Model):
-    def __init__(self, 
-                 observation_space: Union[int, Sequence[int], gym.Space], 
-                 action_space: Union[int, Sequence[int], gym.Space], 
-                 device: Union[str, torch.device] = "cuda:0",
-                 unnormalized_log_prob: bool = True) -> None:
-        """Categorical model (stochastic model)
-
-        :param observation_space: Observation/state space or shape.
-                                  The ``num_observations`` property will contain the size of that space
-        :type observation_space: int, sequence of int, gym.Space
-        :param action_space: Action space or shape.
-                             The ``num_actions`` property will contain the size of that space
-        :type action_space: int, sequence of int, gym.Space
-        :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``)
-        :type device: str or torch.device, optional
+
+class CategoricalMixin:
+    def __init__(self, unnormalized_log_prob: bool = True, role: str = "") -> None:
+        """Categorical mixin model (stochastic model)
+
         :param unnormalized_log_prob: Flag to indicate how to be interpreted the model's output (default: ``True``).
                                       If True, the model's output is interpreted as unnormalized log probabilities 
                                       (it can be any real number), otherwise as normalized probabilities 
                                       (the output must be non-negative, finite and have a non-zero sum)
         :type unnormalized_log_prob: bool, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         Example::
 
             # define the model
             >>> import torch
             >>> import torch.nn as nn
-            >>> from skrl.models.torch import CategoricalModel
+            >>> from skrl.models.torch import Model, CategoricalMixin
             >>> 
-            >>> class Policy(CategoricalModel):
-            ...     def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True):
-            ...         super().__init__(observation_space, action_space, device, unnormalized_log_prob)
+            >>> class Policy(CategoricalMixin, Model):
+            ...     def __init__(self, observation_space, action_space, device="cuda:0", unnormalized_log_prob=True):
+            ...         Model.__init__(self, observation_space, action_space, device)
+            ...         CategoricalMixin.__init__(self, unnormalized_log_prob)
             ...
             ...         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
             ...                                  nn.ELU(),
@@ -65,11 +52,13 @@ def __init__(self,
               )
             )
         """
-        super(CategoricalModel, self).__init__(observation_space, action_space, device)
-
-        self._unnormalized_log_prob = unnormalized_log_prob
+        if not hasattr(self, "_c_unnormalized_log_prob"):
+            self._c_unnormalized_log_prob = {}
+        self._c_unnormalized_log_prob[role] = unnormalized_log_prob
 
-        self._distribution = None
+        if not hasattr(self, "_c_distribution"):
+            self._c_distribution = {}
+        self._c_distribution[role] = None
 
     def act(self, 
             states: torch.Tensor, 
@@ -85,7 +74,7 @@ def act(self,
         :type taken_actions: torch.Tensor, optional
         :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
-        :param role: Role of the model (default: ``""``)
+        :param role: Role play by the model (default: ``""``)
         :type role: str, optional
 
         :return: Action to be taken by the agent given the state of the environment.
@@ -108,25 +97,27 @@ def act(self,
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
 
         # unnormalized log probabilities
-        if self._unnormalized_log_prob:
-            self._distribution = Categorical(logits=output)
+        if self._c_unnormalized_log_prob[role] if role in self._c_unnormalized_log_prob else self._c_unnormalized_log_prob[""]:
+            self._c_distribution[role] = Categorical(logits=output)
         # normalized probabilities
         else:
-            self._distribution = Categorical(probs=output)
+            self._c_distribution[role] = Categorical(probs=output)
         
         # actions and log of the probability density function
-        actions = self._distribution.sample()
-        log_prob = self._distribution.log_prob(actions if taken_actions is None else taken_actions.view(-1))
+        actions = self._c_distribution[role].sample()
+        log_prob = self._c_distribution[role].log_prob(actions if taken_actions is None else taken_actions.view(-1))
 
         if inference:
             return actions.unsqueeze(-1).detach(), log_prob.unsqueeze(-1).detach(), output.detach()
         return actions.unsqueeze(-1), log_prob.unsqueeze(-1), output
 
-    def distribution(self) -> torch.distributions.Categorical:
+    def distribution(self, role: str = "") -> torch.distributions.Categorical:
         """Get the current distribution of the model
 
         :return: Distribution of the model
         :rtype: torch.distributions.Categorical
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         Example::
 
@@ -134,4 +125,4 @@ def distribution(self) -> torch.distributions.Categorical:
             >>> print(distribution)
             Categorical(probs: torch.Size([4096, 2]), logits: torch.Size([4096, 2]))
         """
-        return self._distribution
\ No newline at end of file
+        return self._c_distribution if role in self._c_distribution else self._c_distribution[""]
diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py
index 0e5f5dd4..bde36ec4 100644
--- a/skrl/models/torch/deterministic.py
+++ b/skrl/models/torch/deterministic.py
@@ -1,41 +1,30 @@
-from typing import Optional, Union, Sequence
+from typing import Optional, Sequence
 
 import gym
 
 import torch
 
-from . import Model
 
+class DeterministicMixin:
+    def __init__(self, clip_actions: bool = False, role: str = "") -> None:
+        """Deterministic mixin model (deterministic model)
 
-class DeterministicModel(Model):
-    def __init__(self, 
-                 observation_space: Union[int, Sequence[int], gym.Space], 
-                 action_space: Union[int, Sequence[int], gym.Space], 
-                 device: Union[str, torch.device] = "cuda:0", 
-                 clip_actions: bool = False) -> None:
-        """Deterministic model (deterministic model)
-
-        :param observation_space: Observation/state space or shape.
-                                  The ``num_observations`` property will contain the size of that space
-        :type observation_space: int, sequence of int, gym.Space
-        :param action_space: Action space or shape.
-                             The ``num_actions`` property will contain the size of that space
-        :type action_space: int, sequence of int, gym.Space
-        :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``)
-        :type device: str or torch.device, optional
         :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: ``False``)
         :type clip_actions: bool, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         Example::
 
             # define the model
             >>> import torch
             >>> import torch.nn as nn
-            >>> from skrl.models.torch import DeterministicModel
+            >>> from skrl.models.torch import Model, DeterministicMixin
             >>> 
-            >>> class Value(DeterministicModel):
-            ...     def __init__(self, observation_space, action_space, device, clip_actions=False):
-            ...         super().__init__(observation_space, action_space, device, clip_actions)
+            >>> class Value(DeterministicMixin, Model):
+            ...     def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False):
+            ...         Model.__init__(self, observation_space, action_space, device)
+            ...         DeterministicMixin.__init__(self, clip_actions)
             ...
             ...         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
             ...                                  nn.ELU(),
@@ -61,11 +50,11 @@ def __init__(self,
               )
             )
         """
-        super(DeterministicModel, self).__init__(observation_space, action_space, device)
-
-        self.clip_actions = clip_actions and issubclass(type(self.action_space), gym.Space)
+        if not hasattr(self, "_d_clip_actions"):
+            self._d_clip_actions = {}
+        self._d_clip_actions[role] = clip_actions and issubclass(type(self.action_space), gym.Space)
 
-        if self.clip_actions:
+        if self._d_clip_actions[role]:
             self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device)
             self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device)
 
@@ -86,7 +75,7 @@ def act(self,
         :type taken_actions: torch.Tensor, optional
         :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
-        :param role: Role of the model (default: ``""``)
+        :param role: Role play by the model (default: ``""``)
         :type role: str, optional
 
         :return: Action to be taken by the agent given the state of the environment.
@@ -109,7 +98,7 @@ def act(self,
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
 
         # clip actions 
-        if self.clip_actions:
+        if self._d_clip_actions[role] if role in self._d_clip_actions else self._d_clip_actions[""]:
             if self._backward_compatibility:
                 actions = torch.max(torch.min(actions, self.clip_actions_max), self.clip_actions_min)
             else:
diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index 8950230e..8cad2667 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -1,33 +1,21 @@
-from typing import Optional, Union, Sequence
+from typing import Optional, Sequence
 
 import gym
 
 import torch
 from torch.distributions import Normal
 
-from . import Model
 
-
-class GaussianModel(Model):
+class GaussianMixin:
     def __init__(self, 
-                 observation_space: Union[int, Sequence[int], gym.Space], 
-                 action_space: Union[int, Sequence[int], gym.Space], 
-                 device: Union[str, torch.device] = "cuda:0", 
                  clip_actions: bool = False, 
                  clip_log_std: bool = True, 
                  min_log_std: float = -20, 
                  max_log_std: float = 2,
-                 reduction: str = "sum") -> None:
-        """Gaussian model (stochastic model)
-
-        :param observation_space: Observation/state space or shape.
-                                  The ``num_observations`` property will contain the size of that space
-        :type observation_space: int, sequence of int, gym.Space
-        :param action_space: Action space or shape.
-                             The ``num_actions`` property will contain the size of that space
-        :type action_space: int, sequence of int, gym.Space
-        :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``)
-        :type device: str or torch.device, optional
+                 reduction: str = "sum",
+                 role: str = "") -> None:
+        """Gaussian mixin model (stochastic model)
+
         :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: ``False``)
         :type clip_actions: bool, optional
         :param clip_log_std: Flag to indicate whether the log standard deviations should be clipped (default: ``True``)
@@ -40,6 +28,8 @@ def __init__(self,
                           Supported values are ``"mean"``, ``"sum"``, ``"prod"`` and ``"none"``. If "``none"``, the log probability density 
                           function is returned as a tensor of shape ``(num_samples, num_actions)`` instead of ``(num_samples, 1)``
         :type reduction: str, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         :raises ValueError: If the reduction method is not valid
 
@@ -48,13 +38,13 @@ def __init__(self,
             # define the model
             >>> import torch
             >>> import torch.nn as nn
-            >>> from skrl.models.torch import GaussianModel
+            >>> from skrl.models.torch import Model, GaussianMixin
             >>> 
-            >>> class Policy(GaussianModel):
-            ...     def __init__(self, observation_space, action_space, device, clip_actions=False,
-            ...                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-            ...         super().__init__(observation_space, action_space, device, clip_actions,
-            ...                          clip_log_std, min_log_std, max_log_std)
+            >>> class Policy(GaussianMixin, Model):
+            ...     def __init__(self, observation_space, action_space, device="cuda:0", 
+            ...                  clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2):
+            ...         Model.__init__(self, observation_space, action_space, device)
+            ...         GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
             ...
             ...         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
             ...                                  nn.ELU(),
@@ -81,28 +71,42 @@ def __init__(self,
               )
             )
         """
-        super(GaussianModel, self).__init__(observation_space, action_space, device)
-        
-        self.clip_actions = clip_actions and issubclass(type(self.action_space), gym.Space)
+        if not hasattr(self, "_g_clip_actions"):
+            self._g_clip_actions = {}
+        self._g_clip_actions[role] = clip_actions and issubclass(type(self.action_space), gym.Space)
 
-        if self.clip_actions:
+        if self._g_clip_actions[role]:
             self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device)
             self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device)
             
             # backward compatibility: torch < 1.9 clamp method does not support tensors
             self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9)
 
-        self.clip_log_std = clip_log_std
-        self.log_std_min = min_log_std
-        self.log_std_max = max_log_std
-
-        self._log_std = None
-        self._num_samples = None
-        self._distribution = None
+        if not hasattr(self, "_g_clip_log_std"):
+            self._g_clip_log_std = {}
+        self._g_clip_log_std[role] = clip_log_std
+        if not hasattr(self, "_g_log_std_min"):
+            self._g_log_std_min = {}
+        self._g_log_std_min[role] = min_log_std
+        if not hasattr(self, "_g_log_std_max"):
+            self._g_log_std_max = {}
+        self._g_log_std_max[role] = max_log_std
+
+        if not hasattr(self, "_g_log_std"):
+            self._g_log_std = {}
+        self._g_log_std[role] = None
+        if not hasattr(self, "_g_num_samples"):
+            self._g_num_samples = {}
+        self._g_num_samples[role] = None
+        if not hasattr(self, "_g_distribution"):
+            self._g_distribution = {}
+        self._g_distribution[role] = None
         
         if reduction not in ["mean", "sum", "prod", "none"]:
             raise ValueError("reduction must be one of 'mean', 'sum', 'prod' or 'none'")
-        self._reduction = torch.mean if reduction == "mean" else torch.sum if reduction == "sum" \
+        if not hasattr(self, "_g_reduction"):
+            self._g_reduction = {}
+        self._g_reduction[role] = torch.mean if reduction == "mean" else torch.sum if reduction == "sum" \
             else torch.prod if reduction == "prod" else None
 
     def act(self, 
@@ -119,7 +123,7 @@ def act(self,
         :type taken_actions: torch.Tensor, optional
         :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
-        :param role: Role of the model (default: ``""``)
+        :param role: Role play by the model (default: ``""``)
         :type role: str, optional
         
         :return: Action to be taken by the agent given the state of the environment.
@@ -142,29 +146,32 @@ def act(self,
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
         
         # clamp log standard deviations
-        if self.clip_log_std:
-            log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
+        if self._g_clip_log_std[role] if role in self._g_clip_log_std else self._g_clip_log_std[""]:
+            log_std = torch.clamp(log_std, 
+                                  self._g_log_std_min[role] if role in self._g_log_std_min else self._g_log_std_min[""],
+                                  self._g_log_std_max[role] if role in self._g_log_std_max else self._g_log_std_max[""])
 
-        self._log_std = log_std
-        self._num_samples = actions_mean.shape[0]
+        self._g_log_std[role] = log_std
+        self._g_num_samples[role] = actions_mean.shape[0]
 
         # distribution
-        self._distribution = Normal(actions_mean, log_std.exp())
+        self._g_distribution[role] = Normal(actions_mean, log_std.exp())
 
         # sample using the reparameterization trick
-        actions = self._distribution.rsample()
+        actions = self._g_distribution[role].rsample()
 
         # clip actions
-        if self.clip_actions:
+        if self._g_clip_actions[role] if role in self._g_clip_actions else self._g_clip_actions[""]:
             if self._backward_compatibility:
                 actions = torch.max(torch.min(actions, self.clip_actions_max), self.clip_actions_min)
             else:
                 actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max)
         
         # log of the probability density function
-        log_prob = self._distribution.log_prob(actions if taken_actions is None else taken_actions)
-        if self._reduction is not None:
-            log_prob = self._reduction(log_prob, dim=-1)
+        log_prob = self._g_distribution[role].log_prob(actions if taken_actions is None else taken_actions)
+        reduction = self._g_reduction[role] if role in self._g_reduction else self._g_reduction[""]
+        if reduction is not None:
+            log_prob = reduction(log_prob, dim=-1)
         if log_prob.dim() != actions.dim():
             log_prob = log_prob.unsqueeze(-1)
 
@@ -172,11 +179,13 @@ def act(self,
             return actions.detach(), log_prob.detach(), actions_mean.detach()
         return actions, log_prob, actions_mean
 
-    def get_entropy(self) -> torch.Tensor:
+    def get_entropy(self, role: str = "") -> torch.Tensor:
         """Compute and return the entropy of the model
 
         :return: Entropy of the model
         :rtype: torch.Tensor
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         Example::
 
@@ -184,15 +193,18 @@ def get_entropy(self) -> torch.Tensor:
             >>> print(entropy.shape)
             torch.Size([4096, 8])
         """
-        if self._distribution is None:
+        distribution = self._g_distribution[role] if role in self._g_distribution else self._g_distribution[""]
+        if distribution is None:
             return torch.tensor(0.0, device=self.device)
-        return self._distribution.entropy().to(self.device)
+        return distribution.entropy().to(self.device)
 
-    def get_log_std(self) -> torch.Tensor:
+    def get_log_std(self, role: str = "") -> torch.Tensor:
         """Return the log standard deviation of the model
 
         :return: Log standard deviation of the model
         :rtype: torch.Tensor
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         Example::
 
@@ -200,13 +212,16 @@ def get_log_std(self) -> torch.Tensor:
             >>> print(log_std.shape)
             torch.Size([4096, 8])
         """
-        return self._log_std.repeat(self._num_samples, 1)
+        return (self._g_log_std[role] if role in self._g_log_std else self._g_log_std[""]) \
+            .repeat(self._g_num_samples[role] if role in self._g_num_samples else self._g_num_samples[""], 1)
     
-    def distribution(self) -> torch.distributions.Normal:
+    def distribution(self, role: str = "") -> torch.distributions.Normal:
         """Get the current distribution of the model
 
         :return: Distribution of the model
         :rtype: torch.distributions.Normal
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         Example::
 
@@ -214,4 +229,4 @@ def distribution(self) -> torch.distributions.Normal:
             >>> print(distribution)
             Normal(loc: torch.Size([4096, 8]), scale: torch.Size([4096, 8]))
         """
-        return self._distribution
+        return self._g_distribution[role] if role in self._g_distribution else self._g_distribution[""]
diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py
index b7118b53..684b41ae 100644
--- a/skrl/models/torch/multivariate_gaussian.py
+++ b/skrl/models/torch/multivariate_gaussian.py
@@ -1,32 +1,20 @@
-from typing import Optional, Union, Sequence
+from typing import Optional, Sequence
 
 import gym
 
 import torch
 from torch.distributions import MultivariateNormal
 
-from . import Model
 
-
-class MultivariateGaussianModel(Model):
+class MultivariateGaussianMixin:
     def __init__(self, 
-                 observation_space: Union[int, Sequence[int], gym.Space], 
-                 action_space: Union[int, Sequence[int], gym.Space], 
-                 device: Union[str, torch.device] = "cuda:0", 
                  clip_actions: bool = False, 
                  clip_log_std: bool = True, 
                  min_log_std: float = -20, 
-                 max_log_std: float = 2) -> None:
-        """Multivariate Gaussian model (stochastic model)
-
-        :param observation_space: Observation/state space or shape.
-                                  The ``num_observations`` property will contain the size of that space
-        :type observation_space: int, sequence of int, gym.Space
-        :param action_space: Action space or shape.
-                             The ``num_actions`` property will contain the size of that space
-        :type action_space: int, sequence of int, gym.Space
-        :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``)
-        :type device: str or torch.device, optional
+                 max_log_std: float = 2,
+                 role: str = "") -> None:
+        """Multivariate Gaussian mixin model (stochastic model)
+
         :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: ``False``)
         :type clip_actions: bool, optional
         :param clip_log_std: Flag to indicate whether the log standard deviations should be clipped (default: ``True``)
@@ -35,19 +23,21 @@ def __init__(self,
         :type min_log_std: float, optional
         :param max_log_std: Maximum value of the log standard deviation if ``clip_log_std`` is True (default: ``2``)
         :type max_log_std: float, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         Example::
 
             # define the model
             >>> import torch
             >>> import torch.nn as nn
-            >>> from skrl.models.torch import MultivariateGaussianModel
+            >>> from skrl.models.torch import Model, MultivariateGaussianMixin
             >>> 
-            >>> class Policy(MultivariateGaussianModel):
-            ...     def __init__(self, observation_space, action_space, device, clip_actions=False,
+            >>> class Policy(MultivariateGaussianMixin, Model):
+            ...     def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False,
             ...                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-            ...         super().__init__(observation_space, action_space, device, clip_actions,
-            ...                          clip_log_std, min_log_std, max_log_std)
+            ...         Model.__init__(self, observation_space, action_space, device)
+            ...         MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
             ...
             ...         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
             ...                                  nn.ELU(),
@@ -74,24 +64,36 @@ def __init__(self,
               )
             )
         """
-        super(MultivariateGaussianModel, self).__init__(observation_space, action_space, device)
-        
-        self.clip_actions = clip_actions and issubclass(type(self.action_space), gym.Space)
+        if not hasattr(self, "_mg_clip_actions"):
+            self._mg_clip_actions = {}
+        self._mg_clip_actions[role] = clip_actions and issubclass(type(self.action_space), gym.Space)
 
-        if self.clip_actions:
+        if self._mg_clip_actions[role]:
             self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device)
             self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device)
             
             # backward compatibility: torch < 1.9 clamp method does not support tensors
             self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9)
 
-        self.clip_log_std = clip_log_std
-        self.log_std_min = min_log_std
-        self.log_std_max = max_log_std
-
-        self._log_std = None
-        self._num_samples = None
-        self._distribution = None
+        if not hasattr(self, "_mg_clip_log_std"):
+            self._mg_clip_log_std = {}
+        self._mg_clip_log_std[role] = clip_log_std
+        if not hasattr(self, "_mg_log_std_min"):
+            self._mg_log_std_min = {}
+        self._mg_log_std_min[role] = min_log_std
+        if not hasattr(self, "_mg_log_std_max"):
+            self._mg_log_std_max = {}
+        self._mg_log_std_max[role] = max_log_std
+
+        if not hasattr(self, "_mg_log_std"):
+            self._mg_log_std = {}
+        self._mg_log_std[role] = None
+        if not hasattr(self, "_mg_num_samples"):
+            self._mg_num_samples = {}
+        self._mg_num_samples[role] = None
+        if not hasattr(self, "_mg_distribution"):
+            self._mg_distribution = {}
+        self._mg_distribution[role] = None
         
     def act(self, 
             states: torch.Tensor, 
@@ -107,7 +109,7 @@ def act(self,
         :type taken_actions: torch.Tensor, optional
         :param inference: Flag to indicate whether the model is making inference (default: ``False``)
         :type inference: bool, optional
-        :param role: Role of the model (default: ``""``)
+        :param role: Role play by the model (default: ``""``)
         :type role: str, optional
         
         :return: Action to be taken by the agent given the state of the environment.
@@ -130,28 +132,30 @@ def act(self,
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
         
         # clamp log standard deviations
-        if self.clip_log_std:
-            log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
+        if self._mg_clip_log_std[role] if role in self._mg_clip_log_std else self._mg_clip_log_std[""]:
+            log_std = torch.clamp(log_std, 
+                                  self._mg_log_std_min[role] if role in self._mg_log_std_min else self._mg_log_std_min[""],
+                                  self._mg_log_std_max[role] if role in self._mg_log_std_max else self._mg_log_std_max[""])
 
-        self._log_std = log_std
-        self._num_samples = actions_mean.shape[0]
+        self._mg_log_std[role] = log_std
+        self._mg_num_samples[role] = actions_mean.shape[0]
 
         # distribution
         covariance = torch.diag(log_std.exp() * log_std.exp())
-        self._distribution = MultivariateNormal(actions_mean, scale_tril=covariance)
+        self._mg_distribution[role] = MultivariateNormal(actions_mean, scale_tril=covariance)
 
         # sample using the reparameterization trick
-        actions = self._distribution.rsample()
+        actions = self._mg_distribution[role].rsample()
 
         # clip actions
-        if self.clip_actions:
+        if self._mg_clip_actions[role] if role in self._mg_clip_actions else self._mg_clip_actions[""]:
             if self._backward_compatibility:
                 actions = torch.max(torch.min(actions, self.clip_actions_max), self.clip_actions_min)
             else:
                 actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max)
         
         # log of the probability density function
-        log_prob = self._distribution.log_prob(actions if taken_actions is None else taken_actions)
+        log_prob = self._mg_distribution[role].log_prob(actions if taken_actions is None else taken_actions)
         if log_prob.dim() != actions.dim():
             log_prob = log_prob.unsqueeze(-1)
 
@@ -159,11 +163,13 @@ def act(self,
             return actions.detach(), log_prob.detach(), actions_mean.detach()
         return actions, log_prob, actions_mean
 
-    def get_entropy(self) -> torch.Tensor:
+    def get_entropy(self, role: str = "") -> torch.Tensor:
         """Compute and return the entropy of the model
 
         :return: Entropy of the model
         :rtype: torch.Tensor
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         Example::
 
@@ -171,15 +177,18 @@ def get_entropy(self) -> torch.Tensor:
             >>> print(entropy.shape)
             torch.Size([4096])
         """
-        if self._distribution is None:
+        distribution = self._mg_distribution[role] if role in self._mg_distribution else self._mg_distribution[""]
+        if distribution is None:
             return torch.tensor(0.0, device=self.device)
-        return self._distribution.entropy().to(self.device)
+        return distribution.entropy().to(self.device)
 
-    def get_log_std(self) -> torch.Tensor:
+    def get_log_std(self, role: str = "") -> torch.Tensor:
         """Return the log standard deviation of the model
 
         :return: Log standard deviation of the model
         :rtype: torch.Tensor
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         Example::
 
@@ -187,13 +196,16 @@ def get_log_std(self) -> torch.Tensor:
             >>> print(log_std.shape)
             torch.Size([4096, 8])
         """
-        return self._log_std.repeat(self._num_samples, 1)
-    
-    def distribution(self) -> torch.distributions.MultivariateNormal:
+        return (self._mg_log_std[role] if role in self._mg_log_std else self._mg_log_std[""]) \
+            .repeat(self._mg_num_samples[role] if role in self._mg_num_samples else self._mg_num_samples[""], 1)
+
+    def distribution(self, role: str = "") -> torch.distributions.MultivariateNormal:
         """Get the current distribution of the model
 
         :return: Distribution of the model
         :rtype: torch.distributions.MultivariateNormal
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         Example::
 
@@ -201,4 +213,4 @@ def distribution(self) -> torch.distributions.MultivariateNormal:
             >>> print(distribution)
             MultivariateNormal(loc: torch.Size([4096, 8]), scale_tril: torch.Size([4096, 8, 8]))
         """
-        return self._distribution
+        return self._mg_distribution[role] if role in self._mg_distribution else self._mg_distribution[""]

From bdebddb69884ff47e9bf5c5cc7a9dbcad1b4e14f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 27 Aug 2022 19:41:19 +0200
Subject: [PATCH 043/108] Update models docs to reflect class changes

---
 .../modules/skrl.models.categorical.rst       | 25 ++++-----
 .../modules/skrl.models.deterministic.rst     | 25 ++++-----
 docs/source/modules/skrl.models.gaussian.rst  | 25 ++++-----
 .../skrl.models.multivariate_gaussian.rst     | 19 +++----
 docs/source/snippets/categorical_model.py     | 46 ++++-----------
 docs/source/snippets/deterministic_model.py   | 46 ++++-----------
 docs/source/snippets/gaussian_model.py        | 56 +++++--------------
 .../snippets/multivariate_gaussian_model.py   | 47 +++++-----------
 8 files changed, 93 insertions(+), 196 deletions(-)

diff --git a/docs/source/modules/skrl.models.categorical.rst b/docs/source/modules/skrl.models.categorical.rst
index 8e9c540a..66c075c9 100644
--- a/docs/source/modules/skrl.models.categorical.rst
+++ b/docs/source/modules/skrl.models.categorical.rst
@@ -4,15 +4,15 @@ Categorical model
 =================
 
 Concept
-^^^^^^^
+-------
 
 .. image:: ../_static/imgs/model_categorical.svg
-      :width: 100%
-      :align: center
-      :alt: Categorical model
+    :width: 100%
+    :align: center
+    :alt: Categorical model
 
 Basic usage
-^^^^^^^^^^^
+-----------
 
 .. tabs::
     
@@ -33,11 +33,10 @@ Basic usage
             :end-before: [end-cnn]
 
 API
-^^^
-
-.. autoclass:: skrl.models.torch.categorical.CategoricalModel
-   :show-inheritance:
-   :members:
-   
-   .. automethod:: __init__
-   .. automethod:: compute
+---
+
+.. autoclass:: skrl.models.torch.categorical.CategoricalMixin
+    :show-inheritance:
+    :members:
+
+    .. automethod:: __init__
diff --git a/docs/source/modules/skrl.models.deterministic.rst b/docs/source/modules/skrl.models.deterministic.rst
index 733800a1..04266a88 100644
--- a/docs/source/modules/skrl.models.deterministic.rst
+++ b/docs/source/modules/skrl.models.deterministic.rst
@@ -4,15 +4,15 @@ Deterministic model
 ===================
 
 Concept
-^^^^^^^
+-------
 
 .. image:: ../_static/imgs/model_deterministic.svg
-      :width: 65%
-      :align: center
-      :alt: Deterministic model
+    :width: 65%
+    :align: center
+    :alt: Deterministic model
 
 Basic usage
-^^^^^^^^^^^
+-----------
 
 .. tabs::
     
@@ -33,11 +33,10 @@ Basic usage
             :end-before: [end-cnn]
 
 API
-^^^
-
-.. autoclass:: skrl.models.torch.deterministic.DeterministicModel
-   :show-inheritance:
-   :members:
-   
-   .. automethod:: __init__
-   .. automethod:: compute
+---
+
+.. autoclass:: skrl.models.torch.deterministic.DeterministicMixin
+    :show-inheritance:
+    :members:
+
+    .. automethod:: __init__
diff --git a/docs/source/modules/skrl.models.gaussian.rst b/docs/source/modules/skrl.models.gaussian.rst
index 9739bf25..7a8b788b 100644
--- a/docs/source/modules/skrl.models.gaussian.rst
+++ b/docs/source/modules/skrl.models.gaussian.rst
@@ -4,15 +4,15 @@ Gaussian model
 ==============
 
 Concept
-^^^^^^^
+-------
 
 .. image:: ../_static/imgs/model_gaussian.svg
-      :width: 100%
-      :align: center
-      :alt: Gaussian model
+    :width: 100%
+    :align: center
+    :alt: Gaussian model
 
 Basic usage
-^^^^^^^^^^^
+-----------
 
 .. tabs::
     
@@ -33,11 +33,10 @@ Basic usage
             :end-before: [end-cnn]
 
 API
-^^^
-
-.. autoclass:: skrl.models.torch.gaussian.GaussianModel
-   :show-inheritance:
-   :members:
-   
-   .. automethod:: __init__
-   .. automethod:: compute
+---
+
+.. autoclass:: skrl.models.torch.gaussian.GaussianMixin
+    :show-inheritance:
+    :members:
+
+    .. automethod:: __init__
diff --git a/docs/source/modules/skrl.models.multivariate_gaussian.rst b/docs/source/modules/skrl.models.multivariate_gaussian.rst
index 3ad21398..c4a20066 100644
--- a/docs/source/modules/skrl.models.multivariate_gaussian.rst
+++ b/docs/source/modules/skrl.models.multivariate_gaussian.rst
@@ -4,7 +4,7 @@ Multivariate Gaussian model
 ===========================
 
 Concept
-^^^^^^^
+-------
 
 .. image:: ../_static/imgs/model_multivariate_gaussian.svg
       :width: 100%
@@ -12,7 +12,7 @@ Concept
       :alt: Multivariate Gaussian model
 
 Basic usage
-^^^^^^^^^^^
+-----------
 
 .. tabs::
     
@@ -33,11 +33,10 @@ Basic usage
             :end-before: [end-cnn]
 
 API
-^^^
-
-.. autoclass:: skrl.models.torch.multivariate_gaussian.MultivariateGaussianModel
-   :show-inheritance:
-   :members:
-   
-   .. automethod:: __init__
-   .. automethod:: compute
+---
+
+.. autoclass:: skrl.models.torch.multivariate_gaussian.MultivariateGaussianMixin
+    :show-inheritance:
+    :members:
+
+    .. automethod:: __init__
diff --git a/docs/source/snippets/categorical_model.py b/docs/source/snippets/categorical_model.py
index 799bd6c6..66fcc47b 100644
--- a/docs/source/snippets/categorical_model.py
+++ b/docs/source/snippets/categorical_model.py
@@ -1,29 +1,21 @@
-import gym
-
-class DummyEnv:
-    observation_space = gym.spaces.Box(low=-1, high=1, shape=(4,))
-    action_space = gym.spaces.Discrete(2)
-    device = "cuda:0"
-
-env = DummyEnv()
-
 # [start-mlp]
 import torch.nn as nn
 import torch.nn.functional as F
 
-from skrl.models.torch import CategoricalModel
+from skrl.models.torch import Model, CategoricalMixin
 
 
 # define the model
-class MLP(CategoricalModel):
+class MLP(CategoricalMixin, Model):
     def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True):
-        super().__init__(observation_space, action_space, device, unnormalized_log_prob)
+        Model.__init__(self, observation_space, action_space, device)
+        CategoricalMixin.__init__(self, unnormalized_log_prob)
 
         self.linear_layer_1 = nn.Linear(self.num_observations, 64)
         self.linear_layer_2 = nn.Linear(64, 32)
         self.output_layer = nn.Linear(32, self.num_actions)
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         x = F.relu(self.linear_layer_1(states))
         x = F.relu(self.linear_layer_2(x))
         return self.output_layer(x)
@@ -36,32 +28,19 @@ def compute(self, states, taken_actions):
              unnormalized_log_prob=True)
 # [end-mlp]
 
-import torch
-policy.to(env.device)
-actions = policy.act(torch.randn(10, 4, device=env.device))
-assert actions[0].shape == torch.Size([10, 1])
-
 # =============================================================================
 
-import gym
-
-class DummyEnv:
-    observation_space = gym.spaces.Box(low=0, high=255, shape=(128, 128, 3))
-    action_space = gym.spaces.Discrete(3)
-    device = "cuda:0"
-
-env = DummyEnv()
-
 # [start-cnn]
 import torch.nn as nn
 
-from skrl.models.torch import CategoricalModel
+from skrl.models.torch import Model, CategoricalMixin
 
 
 # define the model
-class CNN(CategoricalModel):
+class CNN(CategoricalMixin, Model):
     def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True):
-        super().__init__(observation_space, action_space, device, unnormalized_log_prob)
+        Model.__init__(self, observation_space, action_space, device)
+        CategoricalMixin.__init__(self, unnormalized_log_prob)
 
         self.net = nn.Sequential(nn.Conv2d(3, 32, kernel_size=8, stride=4),
                                  nn.ReLU(),
@@ -80,7 +59,7 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro
                                  nn.Tanh(),
                                  nn.Linear(32, self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         # permute (samples, width, height, channels) -> (samples, channels, width, height) 
         return self.net(states.permute(0, 3, 1, 2))
 
@@ -91,8 +70,3 @@ def compute(self, states, taken_actions):
              device=env.device, 
              unnormalized_log_prob=True)
 # [end-cnn]
-
-import torch
-policy.to(env.device)
-actions = policy.act(torch.randn(10, 128, 128, 3, device=env.device))
-assert actions[0].shape == torch.Size([10, 1])
diff --git a/docs/source/snippets/deterministic_model.py b/docs/source/snippets/deterministic_model.py
index 797c4045..d735ef63 100644
--- a/docs/source/snippets/deterministic_model.py
+++ b/docs/source/snippets/deterministic_model.py
@@ -1,23 +1,15 @@
-import gym
-
-class DummyEnv:
-    observation_space = gym.spaces.Box(low=-1, high=1, shape=(4,))
-    action_space = gym.spaces.Box(low=-1, high=1, shape=(3,))
-    device = "cuda:0"
-
-env = DummyEnv()
-
 # [start-mlp]
 import torch
 import torch.nn as nn
 
-from skrl.models.torch import DeterministicModel
+from skrl.models.torch import Model, DeterministicMixin
 
 
 # define the model
-class MLP(DeterministicModel):
+class MLP(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations + self.num_actions, 64),
                                  nn.ReLU(),
@@ -25,7 +17,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ReLU(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(torch.cat([states, taken_actions], dim=1))
 
 
@@ -36,33 +28,20 @@ def compute(self, states, taken_actions):
              clip_actions=False)
 # [end-mlp]
 
-import torch
-policy.to(env.device)
-actions = policy.act(torch.randn(10, 4, device=env.device), torch.randn(10, 3, device=env.device))
-assert actions[0].shape == torch.Size([10, 1])
-
 # =============================================================================
 
-import gym
-
-class DummyEnv:
-    observation_space = gym.spaces.Box(low=0, high=255, shape=(64, 64, 3))
-    action_space = gym.spaces.Box(low=-1, high=1, shape=(3,))
-    device = "cuda:0"
-
-env = DummyEnv()
-
 # [start-cnn]
 import torch
 import torch.nn as nn
 
-from skrl.models.torch import DeterministicModel
+from skrl.models.torch import Model, DeterministicMixin
 
 
 # define the model
-class CNN(DeterministicModel):
+class CNN(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.features_extractor = nn.Sequential(nn.Conv2d(3, 32, kernel_size=8, stride=3),
                                                 nn.ReLU(),
@@ -81,7 +60,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.Tanh(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         # permute (samples, width, height, channels) -> (samples, channels, width, height) 
         x = self.features_extractor(states.permute(0, 3, 1, 2))
         return self.net(torch.cat([x, taken_actions], dim=1))
@@ -93,8 +72,3 @@ def compute(self, states, taken_actions):
              device=env.device, 
              clip_actions=False)
 # [end-cnn]
-
-import torch
-policy.to(env.device)
-actions = policy.act(torch.randn(10, 64, 64, 3, device=env.device), torch.randn(10, 3, device=env.device))
-assert actions[0].shape == torch.Size([10, 1])
diff --git a/docs/source/snippets/gaussian_model.py b/docs/source/snippets/gaussian_model.py
index 0b1d5f85..50ef6afe 100644
--- a/docs/source/snippets/gaussian_model.py
+++ b/docs/source/snippets/gaussian_model.py
@@ -1,26 +1,17 @@
-import gym
-
-class DummyEnv:
-    observation_space = gym.spaces.Box(low=-1, high=1, shape=(5,))
-    action_space = gym.spaces.Box(low=-1, high=1, shape=(3,))
-    device = "cuda:0"
-
-env = DummyEnv()
-
 # [start-mlp]
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-from skrl.models.torch import GaussianModel
+from skrl.models.torch import Model, GaussianMixin
 
 
 # define the model
-class MLP(GaussianModel):
-    def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std, reduction)
+class MLP(GaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device, 
+                 clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
 
         self.linear_layer_1 = nn.Linear(self.num_observations, 128)
         self.linear_layer_2 = nn.Linear(128, 64)
@@ -29,7 +20,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
 
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         x = F.relu(self.linear_layer_1(states))
         x = F.relu(self.linear_layer_2(x))
         x = F.relu(self.linear_layer_3(x))
@@ -46,36 +37,22 @@ def compute(self, states, taken_actions):
              reduction="sum")
 # [end-mlp]
 
-import torch
-policy.to(env.device)
-actions = policy.act(torch.randn(10, 5, device=env.device), torch.randn(10, 3, device=env.device))
-assert actions[0].shape == torch.Size([10, env.action_space.shape[0]])
-
 # =============================================================================
 
-import gym
-
-class DummyEnv:
-    observation_space = gym.spaces.Box(low=0, high=255, shape=(256, 256, 1))
-    action_space = gym.spaces.Box(low=-1, high=1, shape=(2,))
-    device = "cuda:0"
-
-env = DummyEnv()
-
 # [start-cnn]
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-from skrl.models.torch import GaussianModel
+from skrl.models.torch import Model, GaussianMixin
 
 
 # define the model
-class CNN(GaussianModel):
-    def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std, reduction)
+class CNN(GaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device, 
+                 clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
 
         self.net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=4, stride=2),
                                  nn.ReLU(),
@@ -96,7 +73,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
         
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         # permute (samples, width, height, channels) -> (samples, channels, width, height) 
         return self.net(states.permute(0, 3, 1, 2)), self.log_std_parameter
 
@@ -111,8 +88,3 @@ def compute(self, states, taken_actions):
              max_log_std=2,
              reduction="sum")
 # [end-cnn]
-
-import torch
-policy.to(env.device)
-actions = policy.act(torch.randn(10, 256, 256, 1, device=env.device), torch.randn(10, 2, device=env.device))
-assert actions[0].shape == torch.Size([10, env.action_space.shape[0]])
diff --git a/docs/source/snippets/multivariate_gaussian_model.py b/docs/source/snippets/multivariate_gaussian_model.py
index 69fbad38..d384271c 100644
--- a/docs/source/snippets/multivariate_gaussian_model.py
+++ b/docs/source/snippets/multivariate_gaussian_model.py
@@ -12,15 +12,15 @@ class DummyEnv:
 import torch.nn as nn
 import torch.nn.functional as F
 
-from skrl.models.torch import MultivariateGaussianModel
+from skrl.models.torch import Model, MultivariateGaussianMixin
 
 
 # define the model
-class MLP(MultivariateGaussianModel):
-    def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+class MLP(MultivariateGaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device, 
+                 clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2):
+        Model.__init__(self, observation_space, action_space, device)
+        MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.linear_layer_1 = nn.Linear(self.num_observations, 128)
         self.linear_layer_2 = nn.Linear(128, 64)
@@ -29,7 +29,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
 
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         x = F.relu(self.linear_layer_1(states))
         x = F.relu(self.linear_layer_2(x))
         x = F.relu(self.linear_layer_3(x))
@@ -45,36 +45,22 @@ def compute(self, states, taken_actions):
              max_log_std=2)
 # [end-mlp]
 
-import torch
-policy.to(env.device)
-actions = policy.act(torch.randn(10, 5, device=env.device), torch.randn(10, 3, device=env.device))
-assert actions[0].shape == torch.Size([10, env.action_space.shape[0]])
-
 # =============================================================================
 
-import gym
-
-class DummyEnv:
-    observation_space = gym.spaces.Box(low=0, high=255, shape=(256, 256, 1))
-    action_space = gym.spaces.Box(low=-1, high=1, shape=(2,))
-    device = "cuda:0"
-
-env = DummyEnv()
-
 # [start-cnn]
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-from skrl.models.torch import MultivariateGaussianModel
+from skrl.models.torch import Model, MultivariateGaussianMixin
 
 
 # define the model
-class CNN(MultivariateGaussianModel):
-    def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+class CNN(MultivariateGaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device, 
+                 clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2):
+        Model.__init__(self, observation_space, action_space, device)
+        MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=4, stride=2),
                                  nn.ReLU(),
@@ -95,7 +81,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
         
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         # permute (samples, width, height, channels) -> (samples, channels, width, height) 
         return self.net(states.permute(0, 3, 1, 2)), self.log_std_parameter
 
@@ -109,8 +95,3 @@ def compute(self, states, taken_actions):
              min_log_std=-20,
              max_log_std=2)
 # [end-cnn]
-
-import torch
-policy.to(env.device)
-actions = policy.act(torch.randn(10, 256, 256, 1, device=env.device), torch.randn(10, 2, device=env.device))
-assert actions[0].shape == torch.Size([10, env.action_space.shape[0]])

From ffe3850a4a750eb2cc938c602e7ad074f18f215b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 28 Aug 2022 00:02:23 +0200
Subject: [PATCH 044/108] Fix model weights initialization for sequential
 containers

---
 skrl/models/torch/base.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py
index c51e98da..1d4b915a 100644
--- a/skrl/models/torch/base.py
+++ b/skrl/models/torch/base.py
@@ -301,7 +301,7 @@ def init_weights(self, method_name: str = "orthogonal_", *args, **kwargs) -> Non
         def _update_weights(module, method_name, args, kwargs):
             for layer in module:
                 if isinstance(layer, torch.nn.Sequential):
-                    _update_weights(layer)
+                    _update_weights(layer, method_name, args, kwargs)
                 elif isinstance(layer, torch.nn.Linear):
                     exec("torch.nn.init.{}(layer.weight, *args, **kwargs)".format(method_name))
         
@@ -316,7 +316,8 @@ def forward(self):
 
     def compute(self, 
                 states: torch.Tensor, 
-                taken_actions: Optional[torch.Tensor] = None) -> Union[torch.Tensor, Sequence[torch.Tensor]]:
+                taken_actions: Optional[torch.Tensor] = None,
+                role: str = "") -> Union[torch.Tensor, Sequence[torch.Tensor]]:
         """Define the computation performed (to be implemented by the inheriting classes) by the models
 
         :param states: Observation/state of the environment used to make the decision
@@ -324,6 +325,8 @@ def compute(self,
         :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
         :type taken_actions: torch.Tensor, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         :raises NotImplementedError: Child class must implement this method
         

From e23d25f3fc714fd8fb67d24bc4e3fd8a4fdc6031 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 28 Aug 2022 00:10:55 +0200
Subject: [PATCH 045/108] Forward role argument to compute method

---
 skrl/models/torch/categorical.py           | 2 +-
 skrl/models/torch/deterministic.py         | 2 +-
 skrl/models/torch/gaussian.py              | 3 ++-
 skrl/models/torch/multivariate_gaussian.py | 3 ++-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py
index dc9e06ec..fba74763 100644
--- a/skrl/models/torch/categorical.py
+++ b/skrl/models/torch/categorical.py
@@ -91,7 +91,7 @@ def act(self,
         # map from states/observations to normalized probabilities or unnormalized log probabilities
         if self._instantiator_net is None:
             output = self.compute(states.to(self.device), 
-                                  taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+                                  taken_actions.to(self.device) if taken_actions is not None else taken_actions, role)
         else:
             output = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py
index bde36ec4..8551d00a 100644
--- a/skrl/models/torch/deterministic.py
+++ b/skrl/models/torch/deterministic.py
@@ -92,7 +92,7 @@ def act(self,
         # map from observations/states to actions
         if self._instantiator_net is None:
             actions = self.compute(states.to(self.device), 
-                                   taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+                                   taken_actions.to(self.device) if taken_actions is not None else taken_actions, role)
         else:
             actions = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index 8cad2667..b80895aa 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -140,7 +140,8 @@ def act(self,
         # map from states/observations to mean actions and log standard deviations
         if self._instantiator_net is None:
             actions_mean, log_std = self.compute(states.to(self.device), 
-                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions,
+                                                 role)
         else:
             actions_mean, log_std = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py
index 684b41ae..b408923c 100644
--- a/skrl/models/torch/multivariate_gaussian.py
+++ b/skrl/models/torch/multivariate_gaussian.py
@@ -126,7 +126,8 @@ def act(self,
         # map from states/observations to mean actions and log standard deviations
         if self._instantiator_net is None:
             actions_mean, log_std = self.compute(states.to(self.device), 
-                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions,
+                                                 role)
         else:
             actions_mean, log_std = self._get_instantiator_output(states.to(self.device), \
                 taken_actions.to(self.device) if taken_actions is not None else taken_actions)

From a0868fdd760aa4a15759fd8b0222c4dc9fc4de1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 5 Sep 2022 17:47:21 +0200
Subject: [PATCH 046/108] Remove models' inference parameter

---
 skrl/models/torch/base.py                  | 8 +-------
 skrl/models/torch/categorical.py           | 5 -----
 skrl/models/torch/deterministic.py         | 5 -----
 skrl/models/torch/gaussian.py              | 5 -----
 skrl/models/torch/multivariate_gaussian.py | 5 -----
 5 files changed, 1 insertion(+), 27 deletions(-)

diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py
index 1d4b915a..93b98074 100644
--- a/skrl/models/torch/base.py
+++ b/skrl/models/torch/base.py
@@ -45,7 +45,7 @@ def __init__(self, observation_space, action_space, device="cuda:0"):
                     self.layer_1 = nn.Linear(self.num_observations, 64)
                     self.layer_2 = nn.Linear(64, self.num_actions)
 
-                def act(self, states, taken_actions=None, inference=False, role=""):
+                def act(self, states, taken_actions=None, role=""):
                     x = F.relu(self.layer_1(states))
                     x = F.relu(self.layer_2(x))
                     return x
@@ -217,7 +217,6 @@ def tensor_to_space(self,
     def random_act(self, 
                    states: torch.Tensor, 
                    taken_actions: Optional[torch.Tensor] = None, 
-                   inference: bool = False,
                    role: str = "") -> Sequence[torch.Tensor]:
         """Act randomly according to the action space
 
@@ -226,8 +225,6 @@ def random_act(self,
         :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
         :type taken_actions: torch.Tensor, optional
-        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
-        :type inference: bool, optional
         :param role: Role play by the model (default: ``""``)
         :type role: str, optional
 
@@ -338,7 +335,6 @@ def compute(self,
     def act(self, 
             states: torch.Tensor, 
             taken_actions: Optional[torch.Tensor] = None, 
-            inference: bool = False,
             role: str = "") -> Sequence[torch.Tensor]:
         """Act according to the specified behavior (to be implemented by the inheriting classes)
 
@@ -351,8 +347,6 @@ def act(self,
         :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
         :type taken_actions: torch.Tensor, optional
-        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
-        :type inference: bool, optional
         :param role: Role play by the model (default: ``""``)
         :type role: str, optional
 
diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py
index fba74763..5c779510 100644
--- a/skrl/models/torch/categorical.py
+++ b/skrl/models/torch/categorical.py
@@ -63,7 +63,6 @@ def __init__(self, unnormalized_log_prob: bool = True, role: str = "") -> None:
     def act(self, 
             states: torch.Tensor, 
             taken_actions: Optional[torch.Tensor] = None, 
-            inference: bool = False,
             role: str = "") -> Sequence[torch.Tensor]:
         """Act stochastically in response to the state of the environment
 
@@ -72,8 +71,6 @@ def act(self,
         :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
         :type taken_actions: torch.Tensor, optional
-        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
-        :type inference: bool, optional
         :param role: Role play by the model (default: ``""``)
         :type role: str, optional
 
@@ -107,8 +104,6 @@ def act(self,
         actions = self._c_distribution[role].sample()
         log_prob = self._c_distribution[role].log_prob(actions if taken_actions is None else taken_actions.view(-1))
 
-        if inference:
-            return actions.unsqueeze(-1).detach(), log_prob.unsqueeze(-1).detach(), output.detach()
         return actions.unsqueeze(-1), log_prob.unsqueeze(-1), output
 
     def distribution(self, role: str = "") -> torch.distributions.Categorical:
diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py
index 8551d00a..bc863320 100644
--- a/skrl/models/torch/deterministic.py
+++ b/skrl/models/torch/deterministic.py
@@ -64,7 +64,6 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None:
     def act(self, 
             states: torch.Tensor, 
             taken_actions: Optional[torch.Tensor] = None, 
-            inference: bool = False,
             role: str = "") -> Sequence[torch.Tensor]:
         """Act deterministically in response to the state of the environment
 
@@ -73,8 +72,6 @@ def act(self,
         :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
         :type taken_actions: torch.Tensor, optional
-        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
-        :type inference: bool, optional
         :param role: Role play by the model (default: ``""``)
         :type role: str, optional
 
@@ -104,7 +101,5 @@ def act(self,
             else:
                 actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max)
 
-        if inference:
-            return actions.detach(), None, None
         return actions, None, None
         
\ No newline at end of file
diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index b80895aa..c06fe570 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -112,7 +112,6 @@ def __init__(self,
     def act(self, 
             states: torch.Tensor, 
             taken_actions: Optional[torch.Tensor] = None, 
-            inference: bool = False,
             role: str = "") -> Sequence[torch.Tensor]:
         """Act stochastically in response to the state of the environment
 
@@ -121,8 +120,6 @@ def act(self,
         :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
         :type taken_actions: torch.Tensor, optional
-        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
-        :type inference: bool, optional
         :param role: Role play by the model (default: ``""``)
         :type role: str, optional
         
@@ -176,8 +173,6 @@ def act(self,
         if log_prob.dim() != actions.dim():
             log_prob = log_prob.unsqueeze(-1)
 
-        if inference:
-            return actions.detach(), log_prob.detach(), actions_mean.detach()
         return actions, log_prob, actions_mean
 
     def get_entropy(self, role: str = "") -> torch.Tensor:
diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py
index b408923c..cc65b7c9 100644
--- a/skrl/models/torch/multivariate_gaussian.py
+++ b/skrl/models/torch/multivariate_gaussian.py
@@ -98,7 +98,6 @@ def __init__(self,
     def act(self, 
             states: torch.Tensor, 
             taken_actions: Optional[torch.Tensor] = None, 
-            inference: bool = False,
             role: str = "") -> Sequence[torch.Tensor]:
         """Act stochastically in response to the state of the environment
 
@@ -107,8 +106,6 @@ def act(self,
         :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
                               The use of these actions only makes sense in critical models, e.g.
         :type taken_actions: torch.Tensor, optional
-        :param inference: Flag to indicate whether the model is making inference (default: ``False``)
-        :type inference: bool, optional
         :param role: Role play by the model (default: ``""``)
         :type role: str, optional
         
@@ -160,8 +157,6 @@ def act(self,
         if log_prob.dim() != actions.dim():
             log_prob = log_prob.unsqueeze(-1)
 
-        if inference:
-            return actions.detach(), log_prob.detach(), actions_mean.detach()
         return actions, log_prob, actions_mean
 
     def get_entropy(self, role: str = "") -> torch.Tensor:

From 14e87646cf55425574210da097172e2bb52fd9e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 5 Sep 2022 17:54:40 +0200
Subject: [PATCH 047/108] Update A2C implementation

---
 skrl/agents/torch/a2c/a2c.py | 69 ++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 38 deletions(-)

diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py
index 8c779bdf..b51d2479 100644
--- a/skrl/agents/torch/a2c/a2c.py
+++ b/skrl/agents/torch/a2c/a2c.py
@@ -2,6 +2,7 @@
 
 import gym
 import copy
+import itertools
 
 import torch
 import torch.nn as nn
@@ -20,8 +21,7 @@
     "discount_factor": 0.99,        # discount factor (gamma)
     "lambda": 0.95,                 # TD(lambda) coefficient (lam) for computing returns and advantages
     
-    "policy_learning_rate": 1e-3,   # policy learning rate
-    "value_learning_rate": 1e-3,    # value learning rate
+    "learning_rate": 1e-3,          # learning rate
     "learning_rate_scheduler": None,        # learning rate scheduler class (see torch.optim.lr_scheduler)
     "learning_rate_scheduler_kwargs": {},   # learning rate scheduler's kwargs (e.g. {"step_size": 1e-3})
 
@@ -104,8 +104,7 @@ def __init__(self,
 
         self._entropy_loss_scale = self.cfg["entropy_loss_scale"]
 
-        self._policy_learning_rate = self.cfg["policy_learning_rate"]
-        self._value_learning_rate = self.cfg["value_learning_rate"]
+        self._learning_rate = self.cfg["learning_rate"]
         self._learning_rate_scheduler = self.cfg["learning_rate_scheduler"]
 
         self._state_preprocessor = self.cfg["state_preprocessor"]
@@ -119,13 +118,15 @@ def __init__(self,
 
         self._rewards_shaper = self.cfg["rewards_shaper"]
 
-        # set up optimizers and learning rate schedulers
+        # set up optimizer and learning rate scheduler
         if self.policy is not None and self.value is not None:
-            self.policy_optimizer = torch.optim.Adam(self.policy.parameters(), lr=self._policy_learning_rate)
-            self.value_optimizer = torch.optim.Adam(self.value.parameters(), lr=self._value_learning_rate)
+            if self.policy is self.value:
+                self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=self._learning_rate)
+            else:
+                self.optimizer = torch.optim.Adam(itertools.chain(self.policy.parameters(), self.value.parameters()), 
+                                                  lr=self._learning_rate)
             if self._learning_rate_scheduler is not None:
-                self.policy_scheduler = self._learning_rate_scheduler(self.policy_optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
-                self.value_scheduler = self._learning_rate_scheduler(self.value_optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
+                self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
         # set up preprocessors
         self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
@@ -157,8 +158,7 @@ def init(self) -> None:
     def act(self, 
             states: torch.Tensor, 
             timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+            timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -167,8 +167,6 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
@@ -178,10 +176,10 @@ def act(self,
         # sample random actions
         # TODO, check for stochasticity
         if timestep < self._random_timesteps:
-            return self.policy.random_act(states)
+            return self.policy.random_act(states, taken_actions=None, role="policy")
 
         # sample stochastic actions
-        return self.policy.act(states, inference=inference)
+        return self.policy.act(states, taken_actions=None, role="policy")
 
     def record_transition(self, 
                           states: torch.Tensor, 
@@ -220,7 +218,8 @@ def record_transition(self,
         self._current_next_states = next_states
 
         if self.memory is not None:
-            values, _, _ = self.value.act(states=self._state_preprocessor(states), inference=True)
+            with torch.no_grad():
+                values, _, _ = self.value.act(self._state_preprocessor(states), taken_actions=None, role="value")
             values = self._value_preprocessor(values, inverse=True)
 
             self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, 
@@ -306,8 +305,8 @@ def compute_gae(rewards: torch.Tensor,
             return returns, advantages
 
         # compute returns and advantages
-        last_values, _, _ = self.value.act(states=self._state_preprocessor(self._current_next_states.float() \
-            if not torch.is_floating_point(self._current_next_states) else self._current_next_states), inference=True)
+        with torch.no_grad():
+            last_values, _, _ = self.value.act(self._state_preprocessor(self._current_next_states.float()), taken_actions=None, role="value")
         last_values = self._value_preprocessor(last_values, inverse=True)
 
         values = self.memory.get_tensor_by_name("values")
@@ -334,35 +333,31 @@ def compute_gae(rewards: torch.Tensor,
 
             sampled_states = self._state_preprocessor(sampled_states, train=True)
 
-            _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions)
+            _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions, role="policy")
 
             # compute entropy loss
             if self._entropy_loss_scale:
-                entropy_loss = -self._entropy_loss_scale * self.policy.get_entropy().mean()
+                entropy_loss = -self._entropy_loss_scale * self.policy.get_entropy(role="policy").mean()
             else:
                 entropy_loss = 0
             
             # compute policy loss
             policy_loss = -(sampled_advantages * next_log_prob).mean()
 
-            # optimization step (policy)
-            self.policy_optimizer.zero_grad()
-            (policy_loss + entropy_loss).backward()
-            if self._grad_norm_clip > 0:
-                nn.utils.clip_grad_norm_(self.policy.parameters(), self._grad_norm_clip)
-            self.policy_optimizer.step()
-
             # compute value loss
-            predicted_values, _, _ = self.value.act(states=sampled_states)
+            predicted_values, _, _ = self.value.act(states=sampled_states, taken_actions=None, role="value")
 
             value_loss = F.mse_loss(sampled_returns, predicted_values)
 
-            # optimization step (value)
-            self.value_optimizer.zero_grad()
-            value_loss.backward()
+            # optimization step
+            self.optimizer.zero_grad()
+            (policy_loss + entropy_loss + value_loss).backward()
             if self._grad_norm_clip > 0:
-                nn.utils.clip_grad_norm_(self.value.parameters(), self._grad_norm_clip)
-            self.value_optimizer.step()
+                if self.policy is self.value:
+                    nn.utils.clip_grad_norm_(self.policy.parameters(), max_norm=self._grad_norm_clip)
+                else:
+                    nn.utils.clip_grad_norm_(itertools.chain(self.policy.parameters(), self.value.parameters()), self._grad_norm_clip)
+            self.optimizer.step()
 
             # update cumulative losses
             cumulative_policy_loss += policy_loss.item()
@@ -372,8 +367,7 @@ def compute_gae(rewards: torch.Tensor,
 
         # update learning rate
         if self._learning_rate_scheduler:
-            self.policy_scheduler.step()
-            self.value_scheduler.step()
+            self.scheduler.step()
 
         # record data
         self.track_data("Loss / Policy loss", cumulative_policy_loss / len(sampled_batches))
@@ -382,8 +376,7 @@ def compute_gae(rewards: torch.Tensor,
         if self._entropy_loss_scale:
             self.track_data("Loss / Entropy loss", cumulative_entropy_loss / len(sampled_batches))
 
-        self.track_data("Policy / Standard deviation", self.policy.distribution().stddev.mean().item())
+        self.track_data("Policy / Standard deviation", self.policy.distribution(role="policy").stddev.mean().item())
 
         if self._learning_rate_scheduler:
-            self.track_data("Learning / Policy learning rate", self.policy_scheduler.get_last_lr()[0])
-            self.track_data("Learning / Value learning rate", self.value_scheduler.get_last_lr()[0])
+            self.track_data("Learning / Learning rate", self.scheduler.get_last_lr()[0])

From c5bef1fea92fb7c3c56ea4c18828466930f48733 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 5 Sep 2022 18:14:27 +0200
Subject: [PATCH 048/108] Update A2C agent in docs

---
 docs/source/modules/skrl.agents.a2c.rst | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/docs/source/modules/skrl.agents.a2c.rst b/docs/source/modules/skrl.agents.a2c.rst
index da63d444..7413838e 100644
--- a/docs/source/modules/skrl.agents.a2c.rst
+++ b/docs/source/modules/skrl.agents.a2c.rst
@@ -8,6 +8,10 @@ Paper: `Asynchronous Methods for Deep Reinforcement Learning <https://arxiv.org/
 Algorithm
 ^^^^^^^^^
 
+.. note::
+
+  This algorithm implementation relies on the existence of parallel environments instead of parallel actor-learners
+
 Algorithm implementation
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -52,19 +56,14 @@ Algorithm implementation
 |         :math:`{L}_{entropy} \leftarrow 0`
 |     :green:`# compute policy loss`
 |     :math:`L_{\pi_\theta} \leftarrow -\frac{1}{N} \sum_{i=1}^N A \; ratio`
-|     :green:`# optimization step (policy)`
-|     reset :math:`\text{optimizer}_{\theta}`
-|     :math:`\nabla_{\theta} (L_{\pi_\theta} + {L}_{entropy})`
-|     :math:`\text{clip}(\lVert \nabla_{\theta} \rVert)` with :guilabel:`grad_norm_clip` 
-|     step :math:`\text{optimizer}_{\theta}`
 |     :green:`# compute value loss`
 |     :math:`V_{_{predicted}} \leftarrow V_\phi(s)`
 |     :math:`L_{V_\phi} \leftarrow \frac{1}{N} \sum_{i=1}^N (R - V_{_{predicted}})^2`
-|     :green:`# optimization step (value)`
-|     reset :math:`\text{optimizer}_{\phi}`
-|     :math:`\nabla_{\phi} L_{V_\phi}`
-|     :math:`\text{clip}(\lVert \nabla_{\phi} \rVert)` with :guilabel:`grad_norm_clip` 
-|     step :math:`\text{optimizer}_{\phi}`
+|     :green:`# optimization step`
+|     reset :math:`\text{optimizer}_{\theta, \phi}`
+|     :math:`\nabla_{\theta, \, \phi} (L_{\pi_\theta} + {L}_{entropy} + L_{V_\phi})`
+|     :math:`\text{clip}(\lVert \nabla_{\theta, \, \phi} \rVert)` with :guilabel:`grad_norm_clip` 
+|     step :math:`\text{optimizer}_{\theta, \phi}`
 | :green:`# update learning rate`
 | **IF** there is a :guilabel:`learning_rate_scheduler` **THEN**
 |     step :math:`\text{scheduler}_{\theta} (\text{optimizer}_{\theta})`
@@ -77,7 +76,7 @@ Configuration and hyperparameters
 
 .. literalinclude:: ../../../skrl/agents/torch/a2c/a2c.py
    :language: python
-   :lines: 16-50
+   :lines: 17-50
    :linenos:
 
 Spaces and models

From fd942963577a3a5ec90a714c6bf582a1702b3eba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 5 Sep 2022 20:04:56 +0200
Subject: [PATCH 049/108] Pass taken_action and role arguments to each model

---
 skrl/agents/torch/a2c/a2c.py               |  7 ++--
 skrl/agents/torch/amp/amp.py               | 37 +++++++++-----------
 skrl/agents/torch/cem/cem.py               | 14 +++-----
 skrl/agents/torch/ddpg/ddpg.py             | 22 +++++-------
 skrl/agents/torch/dqn/ddqn.py              | 23 +++++--------
 skrl/agents/torch/dqn/dqn.py               | 19 ++++------
 skrl/agents/torch/ppo/ppo.py               | 40 +++++++++++-----------
 skrl/agents/torch/q_learning/q_learning.py | 12 ++-----
 skrl/agents/torch/sac/sac.py               | 28 ++++++---------
 skrl/agents/torch/sarsa/sarsa.py           | 14 +++-----
 skrl/agents/torch/td3/td3.py               | 26 ++++++--------
 skrl/agents/torch/trpo/trpo.py             | 29 +++++++---------
 12 files changed, 106 insertions(+), 165 deletions(-)

diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py
index b51d2479..02dec6da 100644
--- a/skrl/agents/torch/a2c/a2c.py
+++ b/skrl/agents/torch/a2c/a2c.py
@@ -155,10 +155,7 @@ def init(self) -> None:
         # create temporary variables needed for storage and computation
         self._current_next_states = None
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -354,7 +351,7 @@ def compute_gae(rewards: torch.Tensor,
             (policy_loss + entropy_loss + value_loss).backward()
             if self._grad_norm_clip > 0:
                 if self.policy is self.value:
-                    nn.utils.clip_grad_norm_(self.policy.parameters(), max_norm=self._grad_norm_clip)
+                    nn.utils.clip_grad_norm_(self.policy.parameters(), self._grad_norm_clip)
                 else:
                     nn.utils.clip_grad_norm_(itertools.chain(self.policy.parameters(), self.value.parameters()), self._grad_norm_clip)
             self.optimizer.step()
diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py
index 1b1df03d..4664fcbb 100644
--- a/skrl/agents/torch/amp/amp.py
+++ b/skrl/agents/torch/amp/amp.py
@@ -231,11 +231,7 @@ def init(self) -> None:
         self._current_log_prob = None
         self._current_states = None
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -244,8 +240,6 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
@@ -259,10 +253,10 @@ def act(self,
         # sample random actions
         # TODO, check for stochasticity
         if timestep < self._random_timesteps:
-            return self.policy.random_act(states)
+            return self.policy.random_act(states, taken_actions=None, role="policy")
 
         # sample stochastic actions
-        actions, log_prob, actions_mean = self.policy.act(states, inference=inference)
+        actions, log_prob, actions_mean = self.policy.act(states, taken_actions=None, role="policy")
         self._current_log_prob = log_prob
 
         return actions, log_prob, actions_mean
@@ -308,10 +302,12 @@ def record_transition(self,
         amp_states = infos["amp_obs"]
 
         if self.memory is not None:
-            values, _, _ = self.value.act(states=self._state_preprocessor(states), inference=True)
+            with torch.no_grad():
+                values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value")
             values = self._value_preprocessor(values, inverse=True)
 
-            next_values, _, _ = self.value.act(states=self._state_preprocessor(next_states), inference=True)
+            with torch.no_grad():
+                next_values, _, _ = self.value.act(states=self._state_preprocessor(next_states), taken_actions=None, role="value")
             next_values = self._value_preprocessor(next_values, inverse=True)
             next_values *= infos['terminate'].view(-1, 1).logical_not()
 
@@ -405,7 +401,7 @@ def compute_gae(rewards: torch.Tensor,
         amp_states = self.memory.get_tensor_by_name("amp_states")
 
         with torch.no_grad():
-            amp_logits, _, _ = self.discriminator.act(self._amp_state_preprocessor(amp_states))
+            amp_logits, _, _ = self.discriminator.act(self._amp_state_preprocessor(amp_states), taken_actions=None, role="discriminator")
             style_reward = -torch.log(torch.maximum(1 - 1 / (1 + torch.exp(-amp_logits)), torch.tensor(0.0001, device=self.device)))
             style_reward *= self._discriminator_reward_scale
         
@@ -452,11 +448,11 @@ def compute_gae(rewards: torch.Tensor,
 
                 sampled_states = self._state_preprocessor(sampled_states, train=True)
                 
-                _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions)
+                _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions, role="policy")
 
                 # compute entropy loss
                 if self._entropy_loss_scale:
-                    entropy_loss = -self._entropy_loss_scale * self.policy.get_entropy().mean()
+                    entropy_loss = -self._entropy_loss_scale * self.policy.get_entropy(role="policy").mean()
                 else:
                     entropy_loss = 0
                 
@@ -468,7 +464,7 @@ def compute_gae(rewards: torch.Tensor,
                 policy_loss = -torch.min(surrogate, surrogate_clipped).mean()
 
                 # compute value loss
-                predicted_values, _, _ = self.value.act(states=sampled_states)
+                predicted_values, _, _ = self.value.act(states=sampled_states, taken_actions=None, role="value")
 
                 if self._clip_predicted_values:
                     predicted_values = sampled_values + torch.clip(predicted_values - sampled_values, 
@@ -489,9 +485,9 @@ def compute_gae(rewards: torch.Tensor,
                     sampled_amp_motion_states = self._amp_state_preprocessor(sampled_motion_batches[batch_index][0], train=True)
 
                 sampled_amp_motion_states.requires_grad_(True)
-                amp_logits, _, _ = self.discriminator.act(states=sampled_amp_states)
-                amp_replay_logits, _, _ = self.discriminator.act(states=sampled_amp_replay_states)
-                amp_motion_logits, _, _ = self.discriminator.act(states=sampled_amp_motion_states)
+                amp_logits, _, _ = self.discriminator.act(states=sampled_amp_states, taken_actions=None, role="discriminator")
+                amp_replay_logits, _, _ = self.discriminator.act(states=sampled_amp_replay_states, taken_actions=None, role="discriminator")
+                amp_motion_logits, _, _ = self.discriminator.act(states=sampled_amp_motion_states, taken_actions=None, role="discriminator")
 
                 amp_cat_logits = torch.cat([amp_logits, amp_replay_logits], dim=0)
 
@@ -530,8 +526,7 @@ def compute_gae(rewards: torch.Tensor,
                 if self._grad_norm_clip > 0:
                     nn.utils.clip_grad_norm_(itertools.chain(self.policy.parameters(), 
                                                              self.value.parameters(), 
-                                                             self.discriminator.parameters()), 
-                                             max_norm=self._grad_norm_clip)
+                                                             self.discriminator.parameters()), self._grad_norm_clip)
                 self.optimizer.step()
 
                 # update cumulative losses
@@ -555,7 +550,7 @@ def compute_gae(rewards: torch.Tensor,
             self.track_data("Loss / Entropy loss", cumulative_entropy_loss / (self._learning_epochs * self._mini_batches))
         self.track_data("Loss / Discriminator loss", cumulative_discriminator_loss / (self._learning_epochs * self._mini_batches))
 
-        self.track_data("Policy / Standard deviation", self.policy.distribution().stddev.mean().item())
+        self.track_data("Policy / Standard deviation", self.policy.distribution(role="policy").stddev.mean().item())
 
         if self._learning_rate_scheduler:
             self.track_data("Learning / Learning rate", self.scheduler.get_last_lr()[0])
diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py
index b2b831d8..35a40da2 100644
--- a/skrl/agents/torch/cem/cem.py
+++ b/skrl/agents/torch/cem/cem.py
@@ -129,11 +129,7 @@ def init(self) -> None:
 
         self.tensors_names = ["states", "actions", "rewards", "next_states", "dones"]
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -142,8 +138,6 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
@@ -153,10 +147,10 @@ def act(self,
         # sample random actions
         # TODO, check for stochasticity
         if timestep < self._random_timesteps:
-            return self.policy.random_act(states)
+            return self.policy.random_act(states, taken_actions=None, role="policy")
 
         # sample stochastic actions 
-        return self.policy.act(states, inference=inference)
+        return self.policy.act(states, taken_actions=None, role="policy")
 
     def record_transition(self, 
                           states: torch.Tensor, 
@@ -269,7 +263,7 @@ def _update(self, timestep: int, timesteps: int) -> None:
             elite_actions = torch.cat([sampled_actions[limits[i][0]:limits[i][1]] for i in indexes[:, 0]], dim=0)
 
         # compute scores for the elite states
-        scores = self.policy.act(elite_states)[2]
+        scores = self.policy.act(elite_states, taken_actions=None, role="policy")[2]
 
         # compute policy loss
         policy_loss = F.cross_entropy(scores, elite_actions.view(-1))
diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py
index d5e9e5b8..e829aef8 100644
--- a/skrl/agents/torch/ddpg/ddpg.py
+++ b/skrl/agents/torch/ddpg/ddpg.py
@@ -163,11 +163,7 @@ def init(self) -> None:
         # backward compatibility: torch < 1.9 clamp method does not support tensors
         self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9)
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -176,8 +172,6 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
@@ -186,10 +180,10 @@ def act(self,
 
         # sample random actions
         if timestep < self._random_timesteps:
-            return self.policy.random_act(states)
+            return self.policy.random_act(states, taken_actions=None, role="policy")
 
         # sample deterministic actions
-        actions = self.policy.act(states, inference=inference)
+        actions = self.policy.act(states, taken_actions=None, role="policy")
 
         # add exloration noise
         if self._exploration_noise is not None:
@@ -313,13 +307,13 @@ def _update(self, timestep: int, timesteps: int) -> None:
 
             # compute target values
             with torch.no_grad():
-                next_actions, _, _ = self.target_policy.act(states=sampled_next_states)
+                next_actions, _, _ = self.target_policy.act(states=sampled_next_states, taken_actions=None, role="target_policy")
                 
-                target_q_values, _, _ = self.target_critic.act(states=sampled_next_states, taken_actions=next_actions)
+                target_q_values, _, _ = self.target_critic.act(states=sampled_next_states, taken_actions=next_actions, role="target_critic")
                 target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values
 
             # compute critic loss
-            critic_values, _, _ = self.critic.act(states=sampled_states, taken_actions=sampled_actions)
+            critic_values, _, _ = self.critic.act(states=sampled_states, taken_actions=sampled_actions, role="critic")
             
             critic_loss = F.mse_loss(critic_values, target_values)
             
@@ -329,8 +323,8 @@ def _update(self, timestep: int, timesteps: int) -> None:
             self.critic_optimizer.step()
 
             # compute policy (actor) loss
-            actions, _, _ = self.policy.act(states=sampled_states)
-            critic_values, _, _ = self.critic.act(states=sampled_states, taken_actions=actions)
+            actions, _, _ = self.policy.act(states=sampled_states, taken_actions=None, role="policy")
+            critic_values, _, _ = self.critic.act(states=sampled_states, taken_actions=actions, role="critic")
 
             policy_loss = -critic_values.mean()
 
diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py
index 94a44bc0..4655531f 100644
--- a/skrl/agents/torch/dqn/ddqn.py
+++ b/skrl/agents/torch/dqn/ddqn.py
@@ -153,11 +153,7 @@ def init(self) -> None:
 
         self.tensors_names = ["states", "actions", "rewards", "next_states", "dones"]
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -166,8 +162,6 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
@@ -175,10 +169,10 @@ def act(self,
         states = self._state_preprocessor(states)
 
         if not self._exploration_timesteps:
-            return torch.argmax(self.q_network.act(states, inference=inference)[0], dim=1, keepdim=True), None, None
+            return torch.argmax(self.q_network.act(states, taken_actions=None, role="q_network")[0], dim=1, keepdim=True), None, None
             
         # sample random actions
-        actions = self.q_network.random_act(states)[0]
+        actions = self.q_network.random_act(states, taken_actions=None, role="q_network")[0]
         if timestep < self._random_timesteps:
             return actions, None, None
 
@@ -188,7 +182,7 @@ def act(self,
 
         indexes = (torch.rand(states.shape[0], device=self.device) >= epsilon).nonzero().view(-1)
         if indexes.numel():
-            actions[indexes] = torch.argmax(self.q_network.act(states[indexes], inference=inference)[0], dim=1, keepdim=True)
+            actions[indexes] = torch.argmax(self.q_network.act(states[indexes], taken_actions=None, role="q_network")[0], dim=1, keepdim=True)
         
         # record epsilon
         self.track_data("Exploration / Exploration epsilon", epsilon)
@@ -278,14 +272,15 @@ def _update(self, timestep: int, timesteps: int) -> None:
 
             # compute target values
             with torch.no_grad():
-                next_q_values, _, _ = self.target_q_network.act(states=sampled_next_states)
+                next_q_values, _, _ = self.target_q_network.act(states=sampled_next_states, taken_actions=None, role="target_q_network")
                 
-                target_q_values = torch.gather(next_q_values, dim=1, \
-                    index=torch.argmax(self.q_network.act(states=sampled_next_states)[0], dim=1, keepdim=True))
+                target_q_values = torch.gather(next_q_values, dim=1, index=torch.argmax(self.q_network.act(states=sampled_next_states, \
+                    taken_actions=None, role="q_network")[0], dim=1, keepdim=True))
                 target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values
 
             # compute Q-network loss
-            q_values = torch.gather(self.q_network.act(states=sampled_states)[0], dim=1, index=sampled_actions.long())
+            q_values = torch.gather(self.q_network.act(states=sampled_states, taken_actions=None, role="q_network")[0], 
+                                    dim=1, index=sampled_actions.long())
 
             q_network_loss = F.mse_loss(q_values, target_values)
             
diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py
index ba6c544f..9c77d947 100644
--- a/skrl/agents/torch/dqn/dqn.py
+++ b/skrl/agents/torch/dqn/dqn.py
@@ -153,11 +153,7 @@ def init(self) -> None:
 
         self.tensors_names = ["states", "actions", "rewards", "next_states", "dones"]
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -166,8 +162,6 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
@@ -175,10 +169,10 @@ def act(self,
         states = self._state_preprocessor(states)
 
         if not self._exploration_timesteps:
-            return torch.argmax(self.q_network.act(states, inference=inference)[0], dim=1, keepdim=True), None, None
+            return torch.argmax(self.q_network.act(states, taken_actions=None, role="q_network")[0], dim=1, keepdim=True), None, None
 
         # sample random actions
-        actions = self.q_network.random_act(states)[0]
+        actions = self.q_network.random_act(states, taken_actions=None, role="q_network")[0]
         if timestep < self._random_timesteps:
             return actions, None, None
 
@@ -188,7 +182,7 @@ def act(self,
 
         indexes = (torch.rand(states.shape[0], device=self.device) >= epsilon).nonzero().view(-1)
         if indexes.numel():
-            actions[indexes] = torch.argmax(self.q_network.act(states[indexes], inference=inference)[0], dim=1, keepdim=True)
+            actions[indexes] = torch.argmax(self.q_network.act(states[indexes], taken_actions=None, role="q_network")[0], dim=1, keepdim=True)
         
         # record epsilon
         self.track_data("Exploration / Exploration epsilon", epsilon)
@@ -278,13 +272,14 @@ def _update(self, timestep: int, timesteps: int) -> None:
 
             # compute target values
             with torch.no_grad():
-                next_q_values, _, _ = self.target_q_network.act(states=sampled_next_states)
+                next_q_values, _, _ = self.target_q_network.act(states=sampled_next_states, taken_actions=None, role="target_q_network")
                 
                 target_q_values = torch.max(next_q_values, dim=-1, keepdim=True)[0]
                 target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values
 
             # compute Q-network loss
-            q_values = torch.gather(self.q_network.act(states=sampled_states)[0], dim=1, index=sampled_actions.long())
+            q_values = torch.gather(self.q_network.act(states=sampled_states, taken_actions=None, role="q_network")[0], 
+                                    dim=1, index=sampled_actions.long())
 
             q_network_loss = F.mse_loss(q_values, target_values)
             
diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py
index 560efdfd..a04c06b9 100644
--- a/skrl/agents/torch/ppo/ppo.py
+++ b/skrl/agents/torch/ppo/ppo.py
@@ -135,8 +135,11 @@ def __init__(self,
 
         # set up optimizer and learning rate scheduler
         if self.policy is not None and self.value is not None:
-            self.optimizer = torch.optim.Adam(itertools.chain(self.policy.parameters(), self.value.parameters()), 
-                                              lr=self._learning_rate)
+            if self.policy is self.value:
+                self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=self._learning_rate)
+            else:
+                self.optimizer = torch.optim.Adam(itertools.chain(self.policy.parameters(), self.value.parameters()), 
+                                                  lr=self._learning_rate)
             if self._learning_rate_scheduler is not None:
                 self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
@@ -169,11 +172,7 @@ def init(self) -> None:
         self._current_log_prob = None
         self._current_next_states = None
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -182,8 +181,6 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
@@ -193,10 +190,10 @@ def act(self,
         # sample random actions
         # TODO, check for stochasticity
         if timestep < self._random_timesteps:
-            return self.policy.random_act(states)
+            return self.policy.random_act(states, taken_actions=None, role="policy")
 
         # sample stochastic actions
-        actions, log_prob, actions_mean = self.policy.act(states, inference=inference)
+        actions, log_prob, actions_mean = self.policy.act(states, taken_actions=None, role="policy")
         self._current_log_prob = log_prob
 
         return actions, log_prob, actions_mean
@@ -238,7 +235,8 @@ def record_transition(self,
         self._current_next_states = next_states
 
         if self.memory is not None:
-            values, _, _ = self.value.act(states=self._state_preprocessor(states), inference=True)
+            with torch.no_grad():
+                values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value")
             values = self._value_preprocessor(values, inverse=True)
 
             self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, 
@@ -324,8 +322,8 @@ def compute_gae(rewards: torch.Tensor,
             return returns, advantages
         
         # compute returns and advantages
-        last_values, _, _ = self.value.act(states=self._state_preprocessor(self._current_next_states.float() \
-            if not torch.is_floating_point(self._current_next_states) else self._current_next_states), inference=True)
+        with torch.no_grad():
+            last_values, _, _ = self.value.act(self._state_preprocessor(self._current_next_states.float()), taken_actions=None, role="value")
         last_values = self._value_preprocessor(last_values, inverse=True)
 
         values = self.memory.get_tensor_by_name("values")
@@ -357,7 +355,7 @@ def compute_gae(rewards: torch.Tensor,
 
                 sampled_states = self._state_preprocessor(sampled_states, train=not epoch)
                 
-                _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions)
+                _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions, role="policy")
 
                 # compute aproximate KL divergence
                 with torch.no_grad():
@@ -371,7 +369,7 @@ def compute_gae(rewards: torch.Tensor,
 
                 # compute entropy loss
                 if self._entropy_loss_scale:
-                    entropy_loss = -self._entropy_loss_scale * self.policy.get_entropy().mean()
+                    entropy_loss = -self._entropy_loss_scale * self.policy.get_entropy(role="policy").mean()
                 else:
                     entropy_loss = 0
                 
@@ -383,7 +381,7 @@ def compute_gae(rewards: torch.Tensor,
                 policy_loss = -torch.min(surrogate, surrogate_clipped).mean()
 
                 # compute value loss
-                predicted_values, _, _ = self.value.act(states=sampled_states)
+                predicted_values, _, _ = self.value.act(states=sampled_states, taken_actions=None, role="value")
 
                 if self._clip_predicted_values:
                     predicted_values = sampled_values + torch.clip(predicted_values - sampled_values, 
@@ -395,8 +393,10 @@ def compute_gae(rewards: torch.Tensor,
                 self.optimizer.zero_grad()
                 (policy_loss + entropy_loss + value_loss).backward()
                 if self._grad_norm_clip > 0:
-                    nn.utils.clip_grad_norm_(itertools.chain(self.policy.parameters(), self.value.parameters()), 
-                                             max_norm=self._grad_norm_clip)
+                    if self.policy is self.value:
+                        nn.utils.clip_grad_norm_(self.policy.parameters(), self._grad_norm_clip)
+                    else:
+                        nn.utils.clip_grad_norm_(itertools.chain(self.policy.parameters(), self.value.parameters()), self._grad_norm_clip)
                 self.optimizer.step()
 
                 # update cumulative losses
@@ -418,7 +418,7 @@ def compute_gae(rewards: torch.Tensor,
         if self._entropy_loss_scale:
             self.track_data("Loss / Entropy loss", cumulative_entropy_loss / (self._learning_epochs * self._mini_batches))
 
-        self.track_data("Policy / Standard deviation", self.policy.distribution().stddev.mean().item())
+        self.track_data("Policy / Standard deviation", self.policy.distribution(role="policy").stddev.mean().item())
 
         if self._learning_rate_scheduler:
             self.track_data("Learning / Learning rate", self.scheduler.get_last_lr()[0])
diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py
index 91fb3bbb..67b74029 100644
--- a/skrl/agents/torch/q_learning/q_learning.py
+++ b/skrl/agents/torch/q_learning/q_learning.py
@@ -98,11 +98,7 @@ def init(self) -> None:
         """
         super().init()
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -111,18 +107,16 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
         """
         # sample random actions
         if timestep < self._random_timesteps:
-            return self.policy.random_act(states)
+            return self.policy.random_act(states, taken_actions=None, role="policy")
 
         # sample actions from policy
-        return self.policy.act(states, inference=inference)
+        return self.policy.act(states, taken_actions=None, role="policy")
 
     def record_transition(self, 
                           states: torch.Tensor, 
diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py
index 4f3a9d57..b4ca788a 100644
--- a/skrl/agents/torch/sac/sac.py
+++ b/skrl/agents/torch/sac/sac.py
@@ -166,11 +166,7 @@ def init(self) -> None:
 
         self.tensors_names = ["states", "actions", "rewards", "next_states", "dones"]
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -179,8 +175,6 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
@@ -190,10 +184,10 @@ def act(self,
         # sample random actions
         # TODO, check for stochasticity
         if timestep < self._random_timesteps:
-            return self.policy.random_act(states)
+            return self.policy.random_act(states, taken_actions=None, role="policy")
 
         # sample stochastic actions
-        return self.policy.act(states, inference=inference)
+        return self.policy.act(states, taken_actions=None, role="policy")
 
     def record_transition(self, 
                           states: torch.Tensor, 
@@ -278,16 +272,16 @@ def _update(self, timestep: int, timesteps: int) -> None:
 
             # compute target values
             with torch.no_grad():
-                next_actions, next_log_prob, _ = self.policy.act(states=sampled_next_states)
+                next_actions, next_log_prob, _ = self.policy.act(states=sampled_next_states, taken_actions=None, role="policy")
 
-                target_q1_values, _, _ = self.target_critic_1.act(states=sampled_next_states, taken_actions=next_actions)
-                target_q2_values, _, _ = self.target_critic_2.act(states=sampled_next_states, taken_actions=next_actions)
+                target_q1_values, _, _ = self.target_critic_1.act(states=sampled_next_states, taken_actions=next_actions, role="target_critic_1")
+                target_q2_values, _, _ = self.target_critic_2.act(states=sampled_next_states, taken_actions=next_actions, role="target_critic_2")
                 target_q_values = torch.min(target_q1_values, target_q2_values) - self._entropy_coefficient * next_log_prob
                 target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values
 
             # compute critic loss
-            critic_1_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=sampled_actions)
-            critic_2_values, _, _ = self.critic_2.act(states=sampled_states, taken_actions=sampled_actions)
+            critic_1_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=sampled_actions, role="critic_1")
+            critic_2_values, _, _ = self.critic_2.act(states=sampled_states, taken_actions=sampled_actions, role="critic_2")
             
             critic_loss = (F.mse_loss(critic_1_values, target_values) + F.mse_loss(critic_2_values, target_values)) / 2
             
@@ -297,9 +291,9 @@ def _update(self, timestep: int, timesteps: int) -> None:
             self.critic_optimizer.step()
 
             # compute policy (actor) loss
-            actions, log_prob, _ = self.policy.act(states=sampled_states)
-            critic_1_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=actions)
-            critic_2_values, _, _ = self.critic_2.act(states=sampled_states, taken_actions=actions)
+            actions, log_prob, _ = self.policy.act(states=sampled_states, taken_actions=None, role="policy")
+            critic_1_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=actions, role="critic_1")
+            critic_2_values, _, _ = self.critic_2.act(states=sampled_states, taken_actions=actions, role="critic_2")
 
             policy_loss = (self._entropy_coefficient * log_prob - torch.min(critic_1_values, critic_2_values)).mean()
 
diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py
index d0710084..4c9f9abb 100644
--- a/skrl/agents/torch/sarsa/sarsa.py
+++ b/skrl/agents/torch/sarsa/sarsa.py
@@ -98,11 +98,7 @@ def init(self) -> None:
         """
         super().init()
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -111,18 +107,16 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
         """
         # sample random actions
         if timestep < self._random_timesteps:
-            return self.policy.random_act(states)
+            return self.policy.random_act(states, taken_actions=None, role="policy")
 
         # sample actions from policy
-        return self.policy.act(states, inference=inference)
+        return self.policy.act(states, taken_actions=None, role="policy")
 
     def record_transition(self, 
                           states: torch.Tensor, 
@@ -205,7 +199,7 @@ def _update(self, timestep: int, timesteps: int) -> None:
         env_ids = torch.arange(self._current_rewards.shape[0]).view(-1, 1)
         
         # compute next actions
-        next_actions = self.policy.act(self._current_next_states)[0]
+        next_actions = self.policy.act(self._current_next_states, taken_actions=None, role="policy")[0]
 
         # update Q-table
         q_table[env_ids, self._current_states, self._current_actions] += self._learning_rate \
diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py
index 572605f0..73eeacdb 100644
--- a/skrl/agents/torch/td3/td3.py
+++ b/skrl/agents/torch/td3/td3.py
@@ -179,11 +179,7 @@ def init(self) -> None:
         # backward compatibility: torch < 1.9 clamp method does not support tensors
         self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9)
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -192,8 +188,6 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
@@ -202,10 +196,10 @@ def act(self,
 
         # sample random actions
         if timestep < self._random_timesteps:
-            return self.policy.random_act(states)
+            return self.policy.random_act(states, taken_actions=None, role="policy")
 
         # sample deterministic actions
-        actions = self.policy.act(states, inference=inference)
+        actions = self.policy.act(states, taken_actions=None, role="policy")
         
         # add noise
         if self._exploration_noise is not None:
@@ -330,7 +324,7 @@ def _update(self, timestep: int, timesteps: int) -> None:
             
             with torch.no_grad():
                 # target policy smoothing
-                next_actions, _, _ = self.target_policy.act(states=sampled_next_states)
+                next_actions, _, _ = self.target_policy.act(states=sampled_next_states, taken_actions=None, role="target_policy")
                 noises = torch.clamp(self._smooth_regularization_noise.sample(next_actions.shape), 
                                      min=-self._smooth_regularization_clip, 
                                      max=self._smooth_regularization_clip)
@@ -342,14 +336,14 @@ def _update(self, timestep: int, timesteps: int) -> None:
                     next_actions.clamp_(min=self.clip_actions_min, max=self.clip_actions_max)
 
                 # compute target values
-                target_q1_values, _, _ = self.target_critic_1.act(states=sampled_next_states, taken_actions=next_actions)
-                target_q2_values, _, _ = self.target_critic_2.act(states=sampled_next_states, taken_actions=next_actions)
+                target_q1_values, _, _ = self.target_critic_1.act(states=sampled_next_states, taken_actions=next_actions, role="target_critic_1")
+                target_q2_values, _, _ = self.target_critic_2.act(states=sampled_next_states, taken_actions=next_actions, role="target_critic_2")
                 target_q_values = torch.min(target_q1_values, target_q2_values)
                 target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values
 
             # compute critic loss
-            critic_1_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=sampled_actions)
-            critic_2_values, _, _ = self.critic_2.act(states=sampled_states, taken_actions=sampled_actions)
+            critic_1_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=sampled_actions, role="critic_1")
+            critic_2_values, _, _ = self.critic_2.act(states=sampled_states, taken_actions=sampled_actions, role="critic_2")
             
             critic_loss = F.mse_loss(critic_1_values, target_values) + F.mse_loss(critic_2_values, target_values)
             
@@ -363,8 +357,8 @@ def _update(self, timestep: int, timesteps: int) -> None:
             if not self._critic_update_counter % self._policy_delay:
 
                 # compute policy (actor) loss
-                actions, _, _ = self.policy.act(states=sampled_states)
-                critic_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=actions)
+                actions, _, _ = self.policy.act(states=sampled_states, taken_actions=None, role="policy")
+                critic_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=actions, role="critic_1")
 
                 policy_loss = -critic_values.mean()
 
diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py
index ce78c89f..aead3d07 100644
--- a/skrl/agents/torch/trpo/trpo.py
+++ b/skrl/agents/torch/trpo/trpo.py
@@ -169,11 +169,7 @@ def init(self) -> None:
         self._current_log_prob = None
         self._current_next_states = None
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -182,8 +178,6 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :return: Actions
         :rtype: torch.Tensor
@@ -193,10 +187,10 @@ def act(self,
         # sample random actions
         # TODO, check for stochasticity
         if timestep < self._random_timesteps:
-            return self.policy.random_act(states)
+            return self.policy.random_act(states, taken_actions=None, role="policy")
 
         # sample stochastic actions
-        actions, log_prob, actions_mean = self.policy.act(states, inference=inference)
+        actions, log_prob, actions_mean = self.policy.act(states, taken_actions=None, role="policy")
         self._current_log_prob = log_prob
 
         return actions, log_prob, actions_mean
@@ -238,7 +232,8 @@ def record_transition(self,
         self._current_next_states = next_states
 
         if self.memory is not None:
-            values, _, _ = self.value.act(states=self._state_preprocessor(states), inference=True)
+            with torch.no_grad():
+                values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value")
             values = self._value_preprocessor(values, inverse=True)
 
             self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, 
@@ -342,7 +337,7 @@ def surrogate_loss(policy: Model,
             :return: Surrogate loss
             :rtype: torch.Tensor
             """
-            _, new_log_prob, _ = policy.act(states, actions)
+            _, new_log_prob, _ = policy.act(states, taken_actions=actions, role="policy")
             return (advantages * torch.exp(new_log_prob - log_prob.detach())).mean()
 
         def conjugate_gradient(policy: Model, 
@@ -426,11 +421,11 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor
             :return: KL divergence
             :rtype: torch.Tensor
             """
-            _, _, mu_1 = policy_1.act(states)
+            _, _, mu_1 = policy_1.act(states, taken_actions=None, role="policy")
             logstd_1 = policy_1.get_log_std()
             mu_1, logstd_1 = mu_1.detach(), logstd_1.detach()
 
-            _, _, mu_2 = policy_2.act(states)
+            _, _, mu_2 = policy_2.act(states, taken_actions=None, role="policy")
             logstd_2 = policy_2.get_log_std()
             
             kl = logstd_1 - logstd_2 + 0.5 * (torch.square(logstd_1.exp()) + torch.square(mu_1 - mu_2)) \
@@ -438,8 +433,8 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor
             return torch.sum(kl, dim=-1).mean()
 
         # compute returns and advantages
-        last_values, _, _ = self.value.act(states=self._state_preprocessor(self._current_next_states.float() \
-            if not torch.is_floating_point(self._current_next_states) else self._current_next_states), inference=True)
+        with torch.no_grad():
+            last_values, _, _ = self.value.act(self._state_preprocessor(self._current_next_states.float()), taken_actions=None, role="value")
         last_values = self._value_preprocessor(last_values, inverse=True)
         
         values = self.memory.get_tensor_by_name("values")
@@ -506,7 +501,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor
                     self.policy.update_parameters(self.backup_policy)
 
                 # compute value loss
-                predicted_values, _, _ = self.value.act(sampled_states)
+                predicted_values, _, _ = self.value.act(sampled_states, taken_actions=None, role="value")
 
                 value_loss = self._value_loss_scale * F.mse_loss(sampled_returns, predicted_values)
 
@@ -529,7 +524,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor
         self.track_data("Loss / Policy loss", cumulative_policy_loss / (self._learning_epochs * self._mini_batches))
         self.track_data("Loss / Value loss", cumulative_value_loss / (self._learning_epochs * self._mini_batches))
         
-        self.track_data("Policy / Standard deviation", self.policy.distribution().stddev.mean().item())
+        self.track_data("Policy / Standard deviation", self.policy.distribution(role="policy").stddev.mean().item())
 
         if self._learning_rate_scheduler:
             self.track_data("Learning / Value learning rate", self.value_scheduler.get_last_lr()[0])

From 0de03ec102b8bffd685c8d8ed1c454ecd68d8d2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 6 Sep 2022 11:12:24 +0200
Subject: [PATCH 050/108] Update checkpoint mechanism to storage optimizers and
 preprocessors

---
 skrl/agents/torch/a2c/a2c.py               | 22 ++++++---
 skrl/agents/torch/amp/amp.py               | 31 +++++++++----
 skrl/agents/torch/base.py                  | 54 +++++++++++++---------
 skrl/agents/torch/cem/cem.py               | 13 ++++--
 skrl/agents/torch/ddpg/ddpg.py             | 17 +++++--
 skrl/agents/torch/dqn/ddqn.py              | 16 +++++--
 skrl/agents/torch/dqn/dqn.py               | 14 ++++--
 skrl/agents/torch/ppo/ppo.py               | 22 ++++++---
 skrl/agents/torch/q_learning/q_learning.py |  4 +-
 skrl/agents/torch/sac/sac.py               | 20 ++++++--
 skrl/agents/torch/sarsa/sarsa.py           |  4 +-
 skrl/agents/torch/td3/td3.py               | 19 ++++++--
 skrl/agents/torch/trpo/trpo.py             | 22 ++++++---
 13 files changed, 182 insertions(+), 76 deletions(-)

diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py
index 02dec6da..64d407fa 100644
--- a/skrl/agents/torch/a2c/a2c.py
+++ b/skrl/agents/torch/a2c/a2c.py
@@ -45,7 +45,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -93,7 +93,8 @@ def __init__(self,
         self.value = self.models.get("value", None)
 
         # checkpoint models
-        self.checkpoint_models = {"policy": self.policy} if self.checkpoint_policy_only else self.models
+        self.checkpoint_modules["policy"] = self.policy
+        self.checkpoint_modules["value"] = self.value
 
         # configuration
         self._mini_batches = self.cfg["mini_batches"]
@@ -128,11 +129,20 @@ def __init__(self,
             if self._learning_rate_scheduler is not None:
                 self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
+            self.checkpoint_modules["optimizer"] = self.optimizer
+
         # set up preprocessors
-        self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
-            else self._empty_preprocessor
-        self._value_preprocessor = self._value_preprocessor(**self.cfg["value_preprocessor_kwargs"]) if self._value_preprocessor \
-            else self._empty_preprocessor
+        if self._state_preprocessor:
+            self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"])
+            self.checkpoint_modules["state_preprocessor"] = self._state_preprocessor
+        else:
+            self._state_preprocessor = self._empty_preprocessor
+
+        if self._value_preprocessor:
+            self._value_preprocessor = self._value_preprocessor(**self.cfg["value_preprocessor_kwargs"])
+            self.checkpoint_modules["value_preprocessor"] = self._value_preprocessor
+        else:
+            self._value_preprocessor = self._empty_preprocessor
 
     def init(self) -> None:
         """Initialize the agent
diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py
index 4664fcbb..e9b2264f 100644
--- a/skrl/agents/torch/amp/amp.py
+++ b/skrl/agents/torch/amp/amp.py
@@ -63,7 +63,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -136,7 +136,9 @@ def __init__(self,
         self.discriminator = self.models.get("discriminator", None)
 
         # checkpoint models
-        self.checkpoint_models = {"policy": self.policy} if self.checkpoint_policy_only else self.models
+        self.checkpoint_modules["policy"] = self.policy
+        self.checkpoint_modules["value"] = self.value
+        self.checkpoint_modules["discriminator"] = self.discriminator
 
         # configuration
         self._learning_epochs = self.cfg["learning_epochs"]
@@ -187,13 +189,26 @@ def __init__(self,
             if self._learning_rate_scheduler is not None:
                 self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
+            self.checkpoint_modules["optimizer"] = self.optimizer
+
         # set up preprocessors
-        self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
-            else self._empty_preprocessor
-        self._value_preprocessor = self._value_preprocessor(**self.cfg["value_preprocessor_kwargs"]) if self._value_preprocessor \
-            else self._empty_preprocessor
-        self._amp_state_preprocessor = self._amp_state_preprocessor(**self.cfg["amp_state_preprocessor_kwargs"]) \
-            if self._amp_state_preprocessor else self._empty_preprocessor
+        if self._state_preprocessor:
+            self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"])
+            self.checkpoint_modules["state_preprocessor"] = self._state_preprocessor
+        else:
+            self._state_preprocessor = self._empty_preprocessor
+
+        if self._value_preprocessor:
+            self._value_preprocessor = self._value_preprocessor(**self.cfg["value_preprocessor_kwargs"])
+            self.checkpoint_modules["value_preprocessor"] = self._value_preprocessor
+        else:
+            self._value_preprocessor = self._empty_preprocessor
+
+        if self._amp_state_preprocessor:
+            self._amp_state_preprocessor = self._amp_state_preprocessor(**self.cfg["amp_state_preprocessor_kwargs"])
+            self.checkpoint_modules["amp_state_preprocessor"] = self._amp_state_preprocessor
+        else:
+            self._amp_state_preprocessor = self._empty_preprocessor
 
     def init(self) -> None:
         """Initialize the agent
diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py
index 87a4e8d2..f7c9ea07 100644
--- a/skrl/agents/torch/base.py
+++ b/skrl/agents/torch/base.py
@@ -66,10 +66,10 @@ def __init__(self,
         self._cumulative_timesteps = None
 
         # checkpoint
-        self.checkpoint_models = {}
+        self.checkpoint_modules = {}
         self.checkpoint_interval = self.cfg.get("experiment", {}).get("checkpoint_interval", 1000)
-        self.checkpoint_policy_only = self.cfg.get("experiment", {}).get("checkpoint_policy_only", True)
-        self.checkpoint_best_models = {"timestep": 0, "reward": -2 ** 31, "saved": False, "models": {}}
+        self.checkpoint_store_separately = self.cfg.get("experiment", {}).get("store_separately", False)
+        self.checkpoint_best_modules = {"timestep": 0, "reward": -2 ** 31, "saved": False, "modules": {}}
 
     def __str__(self) -> str:
         """Generate a representation of the agent as string
@@ -164,23 +164,37 @@ def write_checkpoint(self, timestep: int, timesteps: int) -> None:
         :param timesteps: Number of timesteps
         :type timesteps: int
         """
-        # current models
-        for k, model in self.checkpoint_models.items():
-            name = "{}_{}".format(timestep if timestep is not None else datetime.datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f"), k)
-            model.save(os.path.join(self.experiment_dir, "checkpoints", "{}.pt".format(name)))
+        tag = str(timestep if timestep is not None else datetime.datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f"))
+        # separated modules
+        if self.checkpoint_store_separately:
+            for name, module in self.checkpoint_modules.items():
+                torch.save(module.state_dict(), os.path.join(self.experiment_dir, "checkpoints", "{}_{}.pt".format(name, tag)))
+        # whole agent
+        else:
+            modules = {}
+            for name, module in self.checkpoint_modules.items():
+                modules[name] = module.state_dict()
+            torch.save(modules, os.path.join(self.experiment_dir, "checkpoints", "{}_{}.pt".format("agent", tag)))
 
         # best models
-        if self.checkpoint_best_models["models"] and not self.checkpoint_best_models["saved"]:
-            for k, model in self.checkpoint_models.items():
-                model.save(os.path.join(self.experiment_dir, "checkpoints", "best_{}.pt".format(k)), 
-                           state_dict=self.checkpoint_best_models["models"][k])
-            self.checkpoint_best_models["saved"] = True
+        if self.checkpoint_best_modules["modules"] and not self.checkpoint_best_modules["saved"]:
+            # separated modules
+            if self.checkpoint_store_separately:
+                for name, module in self.checkpoint_modules.items():
+                    torch.save(self.checkpoint_best_modules["modules"][name], 
+                               os.path.join(self.experiment_dir, "checkpoints", "best_{}.pt".format(name)))
+            # whole agent
+            else:
+                modules = {}
+                for name, module in self.checkpoint_modules.items():
+                    modules[name] = self.checkpoint_best_modules["modules"][name]
+                torch.save(modules, os.path.join(self.experiment_dir, "checkpoints", "best_{}.pt".format("agent")))
+            self.checkpoint_best_modules["saved"] = True
 
     def act(self, 
             states: torch.Tensor, 
             timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+            timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -189,8 +203,6 @@ def act(self,
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
-        :param inference: Flag to indicate whether the model is making inference
-        :type inference: bool
 
         :raises NotImplementedError: The method is not implemented by the inheriting classes
 
@@ -302,11 +314,11 @@ def post_interaction(self, timestep: int, timesteps: int) -> None:
         if timestep > 1 and self.write_interval > 0 and not timestep % self.write_interval:
             # update best models
             reward = np.mean(self.tracking_data.get("Reward / Total reward (mean)", -2 ** 31))
-            if reward > self.checkpoint_best_models["reward"]:
-                self.checkpoint_best_models["timestep"] = timestep
-                self.checkpoint_best_models["reward"] = reward
-                self.checkpoint_best_models["saved"] = False
-                self.checkpoint_best_models["models"] = {k: copy.deepcopy(model.state_dict()) for k, model in self.checkpoint_models.items()}
+            if reward > self.checkpoint_best_modules["reward"]:
+                self.checkpoint_best_modules["timestep"] = timestep
+                self.checkpoint_best_modules["reward"] = reward
+                self.checkpoint_best_modules["saved"] = False
+                self.checkpoint_best_modules["modules"] = {k: copy.deepcopy(v.state_dict()) for k, v in self.checkpoint_modules.items()}
 
             # write to tensorboard
             self.write_tracking_data(timestep, timesteps)
diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py
index 35a40da2..fb655e65 100644
--- a/skrl/agents/torch/cem/cem.py
+++ b/skrl/agents/torch/cem/cem.py
@@ -36,7 +36,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -83,7 +83,7 @@ def __init__(self,
         self.policy = self.models.get("policy", None)
 
         # checkpoint models
-        self.checkpoint_models = self.models
+        self.checkpoint_modules["policy"] = self.policy
         
         # configuration:
         self._rollouts = self.cfg["rollouts"]
@@ -110,9 +110,14 @@ def __init__(self,
             if self._learning_rate_scheduler is not None:
                 self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
+            self.checkpoint_modules["optimizer"] = self.optimizer
+
         # set up preprocessors
-        self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
-            else self._empty_preprocessor
+        if self._state_preprocessor:
+            self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"])
+            self.checkpoint_modules["state_preprocessor"] = self._state_preprocessor
+        else:
+            self._state_preprocessor = self._empty_preprocessor
 
     def init(self) -> None:
         """Initialize the agent
diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py
index e829aef8..703dcd10 100644
--- a/skrl/agents/torch/ddpg/ddpg.py
+++ b/skrl/agents/torch/ddpg/ddpg.py
@@ -45,7 +45,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -95,7 +95,10 @@ def __init__(self,
         self.target_critic = self.models.get("target_critic", None)
 
         # checkpoint models
-        self.checkpoint_models = {"policy": self.policy} if self.checkpoint_policy_only else self.models
+        self.checkpoint_modules["policy"] = self.policy
+        self.checkpoint_modules["target_policy"] = self.target_policy
+        self.checkpoint_modules["critic"] = self.critic
+        self.checkpoint_modules["target_critic"] = self.target_critic
         
         if self.target_policy is not None and self.target_critic is not None:
         # freeze target networks with respect to optimizers (update via .update_parameters())
@@ -137,9 +140,15 @@ def __init__(self,
                 self.policy_scheduler = self._learning_rate_scheduler(self.policy_optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
                 self.critic_scheduler = self._learning_rate_scheduler(self.critic_optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
+            self.checkpoint_modules["policy_optimizer"] = self.policy_optimizer
+            self.checkpoint_modules["critic_optimizer"] = self.critic_optimizer
+
         # set up preprocessors
-        self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
-            else self._empty_preprocessor
+        if self._state_preprocessor:
+            self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"])
+            self.checkpoint_modules["state_preprocessor"] = self._state_preprocessor
+        else:
+            self._state_preprocessor = self._empty_preprocessor
 
     def init(self) -> None:
         """Initialize the agent
diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py
index 4655531f..be0affb9 100644
--- a/skrl/agents/torch/dqn/ddqn.py
+++ b/skrl/agents/torch/dqn/ddqn.py
@@ -47,7 +47,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -95,8 +95,9 @@ def __init__(self,
         self.target_q_network = self.models.get("target_q_network", None)
 
         # checkpoint models
-        self.checkpoint_models = {"q_network": self.q_network} if self.checkpoint_policy_only else self.models
-        
+        self.checkpoint_modules["q_network"] = self.q_network
+        self.checkpoint_modules["target_q_network"] = self.target_q_network
+
         if self.target_q_network is not None:
             # freeze target networks with respect to optimizers (update via .update_parameters())
             self.target_q_network.freeze_parameters(True)
@@ -134,9 +135,14 @@ def __init__(self,
             if self._learning_rate_scheduler is not None:
                 self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
+        self.checkpoint_modules["optimizer"] = self.optimizer
+
         # set up preprocessors
-        self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
-            else self._empty_preprocessor
+        if self._state_preprocessor:
+            self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"])
+            self.checkpoint_modules["state_preprocessor"] = self._state_preprocessor
+        else:
+            self._state_preprocessor = self._empty_preprocessor
 
     def init(self) -> None:
         """Initialize the agent
diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py
index 9c77d947..a3cb03b2 100644
--- a/skrl/agents/torch/dqn/dqn.py
+++ b/skrl/agents/torch/dqn/dqn.py
@@ -47,7 +47,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -95,7 +95,8 @@ def __init__(self,
         self.target_q_network = self.models.get("target_q_network", None)
 
         # checkpoint models
-        self.checkpoint_models = {"q_network": self.q_network} if self.checkpoint_policy_only else self.models
+        self.checkpoint_modules["q_network"] = self.q_network
+        self.checkpoint_modules["target_q_network"] = self.target_q_network
         
         if self.target_q_network is not None:
             # freeze target networks with respect to optimizers (update via .update_parameters())
@@ -134,9 +135,14 @@ def __init__(self,
             if self._learning_rate_scheduler is not None:
                 self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
+            self.checkpoint_modules["optimizer"] = self.optimizer
+
         # set up preprocessors
-        self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
-            else self._empty_preprocessor
+        if self._state_preprocessor:
+            self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"])
+            self.checkpoint_modules["state_preprocessor"] = self._state_preprocessor
+        else:
+            self._state_preprocessor = self._empty_preprocessor
 
     def init(self) -> None:
         """Initialize the agent
diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py
index a04c06b9..9581c362 100644
--- a/skrl/agents/torch/ppo/ppo.py
+++ b/skrl/agents/torch/ppo/ppo.py
@@ -53,7 +53,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -101,7 +101,8 @@ def __init__(self,
         self.value = self.models.get("value", None)
 
         # checkpoint models
-        self.checkpoint_models = {"policy": self.policy} if self.checkpoint_policy_only else self.models
+        self.checkpoint_modules["policy"] = self.policy
+        self.checkpoint_modules["value"] = self.value
 
         # configuration
         self._learning_epochs = self.cfg["learning_epochs"]
@@ -143,11 +144,20 @@ def __init__(self,
             if self._learning_rate_scheduler is not None:
                 self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
+            self.checkpoint_modules["optimizer"] = self.optimizer
+
         # set up preprocessors
-        self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
-            else self._empty_preprocessor
-        self._value_preprocessor = self._value_preprocessor(**self.cfg["value_preprocessor_kwargs"]) if self._value_preprocessor \
-            else self._empty_preprocessor
+        if self._state_preprocessor:
+            self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"])
+            self.checkpoint_modules["state_preprocessor"] = self._state_preprocessor
+        else:
+            self._state_preprocessor = self._empty_preprocessor
+
+        if self._value_preprocessor:
+            self._value_preprocessor = self._value_preprocessor(**self.cfg["value_preprocessor_kwargs"])
+            self.checkpoint_modules["value_preprocessor"] = self._value_preprocessor
+        else:
+            self._value_preprocessor = self._empty_preprocessor
 
     def init(self) -> None:
         """Initialize the agent
diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py
index 67b74029..95f4a9a5 100644
--- a/skrl/agents/torch/q_learning/q_learning.py
+++ b/skrl/agents/torch/q_learning/q_learning.py
@@ -27,7 +27,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -74,7 +74,7 @@ def __init__(self,
         self.policy = self.models.get("policy", None)
 
         # checkpoint models
-        self.checkpoint_models = {"policy": self.policy} if self.checkpoint_policy_only else self.models
+        self.checkpoint_modules["policy"] = self.policy
         
         # configuration
         self._discount_factor = self.cfg["discount_factor"]
diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py
index b4ca788a..886c6f25 100644
--- a/skrl/agents/torch/sac/sac.py
+++ b/skrl/agents/torch/sac/sac.py
@@ -45,7 +45,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -96,7 +96,11 @@ def __init__(self,
         self.target_critic_2 = self.models.get("target_critic_2", None)
 
         # checkpoint models
-        self.checkpoint_models = {"policy": self.policy} if self.checkpoint_policy_only else self.models
+        self.checkpoint_modules["policy"] = self.policy
+        self.checkpoint_modules["critic_1"] = self.critic_1
+        self.checkpoint_modules["critic_2"] = self.critic_2
+        self.checkpoint_modules["target_critic_1"] = self.target_critic_1
+        self.checkpoint_modules["target_critic_2"] = self.target_critic_2
 
         if self.target_critic_1 is not None and self.target_critic_2 is not None:
             # freeze target networks with respect to optimizers (update via .update_parameters())
@@ -138,6 +142,8 @@ def __init__(self,
             self.log_entropy_coefficient = torch.log(torch.ones(1, device=self.device) * self._entropy_coefficient).requires_grad_(True)
             self.entropy_optimizer = torch.optim.Adam([self.log_entropy_coefficient], lr=self._entropy_learning_rate)
 
+            self.checkpoint_modules["entropy_optimizer"] = self.entropy_optimizer
+
         # set up optimizers and learning rate schedulers
         if self.policy is not None and self.critic_1 is not None and self.critic_2 is not None:
             self.policy_optimizer = torch.optim.Adam(self.policy.parameters(), lr=self._actor_learning_rate)
@@ -147,9 +153,15 @@ def __init__(self,
                 self.policy_scheduler = self._learning_rate_scheduler(self.policy_optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
                 self.critic_scheduler = self._learning_rate_scheduler(self.critic_optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
+            self.checkpoint_modules["policy_optimizer"] = self.policy_optimizer
+            self.checkpoint_modules["critic_optimizer"] = self.critic_optimizer
+
         # set up preprocessors
-        self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
-            else self._empty_preprocessor
+        if self._state_preprocessor:
+            self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"])
+            self.checkpoint_modules["state_preprocessor"] = self._state_preprocessor
+        else:
+            self._state_preprocessor = self._empty_preprocessor
 
     def init(self) -> None:
         """Initialize the agent
diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py
index 4c9f9abb..9871fe8c 100644
--- a/skrl/agents/torch/sarsa/sarsa.py
+++ b/skrl/agents/torch/sarsa/sarsa.py
@@ -27,7 +27,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -74,7 +74,7 @@ def __init__(self,
         self.policy = self.models.get("policy", None)
 
         # checkpoint models
-        self.checkpoint_models = {"policy": self.policy} if self.checkpoint_policy_only else self.models
+        self.checkpoint_modules["policy"] = self.policy
         
         # configuration
         self._discount_factor = self.cfg["discount_factor"]
diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py
index 73eeacdb..c3c66442 100644
--- a/skrl/agents/torch/td3/td3.py
+++ b/skrl/agents/torch/td3/td3.py
@@ -50,7 +50,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -102,7 +102,12 @@ def __init__(self,
         self.target_critic_2 = self.models.get("target_critic_2", None)
         
         # checkpoint models
-        self.checkpoint_models = {"policy": self.policy} if self.checkpoint_policy_only else self.models
+        self.checkpoint_modules["policy"] = self.policy
+        self.checkpoint_modules["target_policy"] = self.target_policy
+        self.checkpoint_modules["critic_1"] = self.critic_1
+        self.checkpoint_modules["critic_2"] = self.critic_2
+        self.checkpoint_modules["target_critic_1"] = self.target_critic_1
+        self.checkpoint_modules["target_critic_2"] = self.target_critic_2
 
         if self.target_policy is not None and self.target_critic_1 is not None and self.target_critic_2 is not None:
             # freeze target networks with respect to optimizers (update via .update_parameters())
@@ -153,9 +158,15 @@ def __init__(self,
                 self.policy_scheduler = self._learning_rate_scheduler(self.policy_optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
                 self.critic_scheduler = self._learning_rate_scheduler(self.critic_optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
+            self.checkpoint_modules["policy_optimizer"] = self.policy_optimizer
+            self.checkpoint_modules["critic_optimizer"] = self.critic_optimizer
+
         # set up preprocessors
-        self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
-            else self._empty_preprocessor
+        if self._state_preprocessor:
+            self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"])
+            self.checkpoint_modules["state_preprocessor"] = self._state_preprocessor
+        else:
+            self._state_preprocessor = self._empty_preprocessor
 
     def init(self) -> None:
         """Initialize the agent
diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py
index aead3d07..b06c84fe 100644
--- a/skrl/agents/torch/trpo/trpo.py
+++ b/skrl/agents/torch/trpo/trpo.py
@@ -53,7 +53,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": True,           # whether to store checkpoints separately
     }
 }
 
@@ -103,7 +103,8 @@ def __init__(self,
         self.backup_policy = copy.deepcopy(self.policy)
 
         # checkpoint models
-        self.checkpoint_models = {"policy": self.policy} if self.checkpoint_policy_only else self.models
+        self.checkpoint_modules["policy"] = self.policy
+        self.checkpoint_modules["value"] = self.value
 
         # configuration
         self._learning_epochs = self.cfg["learning_epochs"]
@@ -141,11 +142,20 @@ def __init__(self,
             if self._learning_rate_scheduler is not None:
                 self.value_scheduler = self._learning_rate_scheduler(self.value_optimizer, **self.cfg["learning_rate_scheduler_kwargs"])
 
+            self.checkpoint_modules["value_optimizer"] = self.value_optimizer
+
         # set up preprocessors
-        self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"]) if self._state_preprocessor \
-            else self._empty_preprocessor
-        self._value_preprocessor = self._value_preprocessor(**self.cfg["value_preprocessor_kwargs"]) if self._value_preprocessor \
-            else self._empty_preprocessor
+        if self._state_preprocessor:
+            self._state_preprocessor = self._state_preprocessor(**self.cfg["state_preprocessor_kwargs"])
+            self.checkpoint_modules["state_preprocessor"] = self._state_preprocessor
+        else:
+            self._state_preprocessor = self._empty_preprocessor
+
+        if self._value_preprocessor:
+            self._value_preprocessor = self._value_preprocessor(**self.cfg["value_preprocessor_kwargs"])
+            self.checkpoint_modules["value_preprocessor"] = self._value_preprocessor
+        else:
+            self._value_preprocessor = self._empty_preprocessor
 
     def init(self) -> None:
         """Initialize the agent

From 22f2a395b4ed179b364604fe2dd4e84cd228c0a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 6 Sep 2022 11:51:26 +0200
Subject: [PATCH 051/108] Remove inference argument for model acting

---
 skrl/trainers/torch/base.py       |  4 ++--
 skrl/trainers/torch/manual.py     | 14 ++++----------
 skrl/trainers/torch/parallel.py   |  5 +----
 skrl/trainers/torch/sequential.py | 10 ++--------
 4 files changed, 9 insertions(+), 24 deletions(-)

diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py
index 538e08a1..7565b090 100644
--- a/skrl/trainers/torch/base.py
+++ b/skrl/trainers/torch/base.py
@@ -168,7 +168,7 @@ def single_agent_train(self) -> None:
 
             # compute actions
             with torch.no_grad():
-                actions, _, _ = self.agents.act(states, inference=True, timestep=timestep, timesteps=self.timesteps)
+                actions, _, _ = self.agents.act(states, timestep=timestep, timesteps=self.timesteps)
 
             # step the environments
             next_states, rewards, dones, infos = self.env.step(actions)
@@ -220,7 +220,7 @@ def single_agent_eval(self) -> None:
 
             # compute actions
             with torch.no_grad():
-                actions, _, _ = self.agents.act(states, inference=True, timestep=timestep, timesteps=self.timesteps)
+                actions, _, _ = self.agents.act(states, timestep=timestep, timesteps=self.timesteps)
 
             # step the environments
             next_states, rewards, dones, infos = self.env.step(actions)
diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py
index 938fdc80..d82fb26e 100644
--- a/skrl/trainers/torch/manual.py
+++ b/skrl/trainers/torch/manual.py
@@ -87,7 +87,7 @@ def train(self, timestep: int, timesteps: Optional[int] = None) -> None:
 
             # compute actions
             with torch.no_grad():
-                actions, _, _ = self.agents.act(self.states, inference=True, timestep=timestep, timesteps=timesteps)
+                actions, _, _ = self.agents.act(self.states, timestep=timestep, timesteps=timesteps)
 
         else:
             # pre-interaction
@@ -96,10 +96,7 @@ def train(self, timestep: int, timesteps: Optional[int] = None) -> None:
 
             # compute actions
             with torch.no_grad():
-                actions = torch.vstack([agent.act(self.states[scope[0]:scope[1]],
-                                                  inference=True,
-                                                  timestep=timestep,
-                                                  timesteps=timesteps)[0] \
+                actions = torch.vstack([agent.act(self.states[scope[0]:scope[1]], timestep=timestep, timesteps=timesteps)[0] \
                                         for agent, scope in zip(self.agents, self.agents_scope)])
 
         # step the environments
@@ -178,14 +175,11 @@ def eval(self, timestep: int, timesteps: Optional[int] = None) -> None:
         with torch.no_grad():
             if self.num_agents == 1:
                 # compute actions
-                actions, _, _ = self.agents.act(self.states, inference=True, timestep=timestep, timesteps=timesteps)
+                actions, _, _ = self.agents.act(self.states, timestep=timestep, timesteps=timesteps)
 
             else:
                 # compute actions
-                actions = torch.vstack([agent.act(self.states[scope[0]:scope[1]],
-                                                  inference=True,
-                                                  timestep=timestep,
-                                                  timesteps=timesteps)[0] \
+                actions = torch.vstack([agent.act(self.states[scope[0]:scope[1]], timestep=timestep, timesteps=timesteps)[0] \
                                         for agent, scope in zip(self.agents, self.agents_scope)])
 
         # step the environments
diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py
index c5526223..c02957a7 100644
--- a/skrl/trainers/torch/parallel.py
+++ b/skrl/trainers/torch/parallel.py
@@ -57,10 +57,7 @@ def fn_processor(process_index, *args):
         elif task == "act":
             _states = queue.get()[scope[0]:scope[1]]
             with torch.no_grad():
-                _actions = agent.act(_states,
-                                     inference=True,
-                                     timestep=msg['timestep'],
-                                     timesteps=msg['timesteps'])[0]
+                _actions = agent.act(_states, timestep=msg['timestep'], timesteps=msg['timesteps'])[0]
                 if not _actions.is_cuda:
                     _actions.share_memory_()
                 queue.put(_actions)
diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py
index 42114ad0..910b6079 100644
--- a/skrl/trainers/torch/sequential.py
+++ b/skrl/trainers/torch/sequential.py
@@ -77,10 +77,7 @@ def train(self) -> None:
 
             # compute actions
             with torch.no_grad():
-                actions = torch.vstack([agent.act(states[scope[0]:scope[1]],
-                                                  inference=True,
-                                                  timestep=timestep,
-                                                  timesteps=self.timesteps)[0] \
+                actions = torch.vstack([agent.act(states[scope[0]:scope[1]], timestep=timestep, timesteps=self.timesteps)[0] \
                                         for agent, scope in zip(self.agents, self.agents_scope)])
 
             # step the environments
@@ -138,10 +135,7 @@ def eval(self) -> None:
 
             # compute actions
             with torch.no_grad():
-                actions = torch.vstack([agent.act(states[scope[0]:scope[1]],
-                                                  inference=True,
-                                                  timestep=timestep,
-                                                  timesteps=self.timesteps)[0] \
+                actions = torch.vstack([agent.act(states[scope[0]:scope[1]], timestep=timestep, timesteps=self.timesteps)[0] \
                                         for agent, scope in zip(self.agents, self.agents_scope)])
 
             # step the environments

From daee3e9eba81c628cb4c719055f73093c0377365 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 6 Sep 2022 13:58:48 +0200
Subject: [PATCH 052/108] Set models' store_separately configuration to False
 by default

---
 skrl/agents/torch/a2c/a2c.py               | 2 +-
 skrl/agents/torch/amp/amp.py               | 2 +-
 skrl/agents/torch/cem/cem.py               | 2 +-
 skrl/agents/torch/ddpg/ddpg.py             | 2 +-
 skrl/agents/torch/dqn/ddqn.py              | 2 +-
 skrl/agents/torch/dqn/dqn.py               | 2 +-
 skrl/agents/torch/ppo/ppo.py               | 2 +-
 skrl/agents/torch/q_learning/q_learning.py | 2 +-
 skrl/agents/torch/sac/sac.py               | 2 +-
 skrl/agents/torch/sarsa/sarsa.py           | 2 +-
 skrl/agents/torch/td3/td3.py               | 2 +-
 skrl/agents/torch/trpo/trpo.py             | 2 +-
 12 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py
index 64d407fa..0693e0d5 100644
--- a/skrl/agents/torch/a2c/a2c.py
+++ b/skrl/agents/torch/a2c/a2c.py
@@ -45,7 +45,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py
index e9b2264f..4af3af05 100644
--- a/skrl/agents/torch/amp/amp.py
+++ b/skrl/agents/torch/amp/amp.py
@@ -63,7 +63,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py
index fb655e65..6168ed02 100644
--- a/skrl/agents/torch/cem/cem.py
+++ b/skrl/agents/torch/cem/cem.py
@@ -36,7 +36,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py
index 703dcd10..dc7235ab 100644
--- a/skrl/agents/torch/ddpg/ddpg.py
+++ b/skrl/agents/torch/ddpg/ddpg.py
@@ -45,7 +45,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py
index be0affb9..6ad0901e 100644
--- a/skrl/agents/torch/dqn/ddqn.py
+++ b/skrl/agents/torch/dqn/ddqn.py
@@ -47,7 +47,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py
index a3cb03b2..0a18ece2 100644
--- a/skrl/agents/torch/dqn/dqn.py
+++ b/skrl/agents/torch/dqn/dqn.py
@@ -47,7 +47,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py
index 9581c362..2fa6081d 100644
--- a/skrl/agents/torch/ppo/ppo.py
+++ b/skrl/agents/torch/ppo/ppo.py
@@ -53,7 +53,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py
index 95f4a9a5..f13a0c4a 100644
--- a/skrl/agents/torch/q_learning/q_learning.py
+++ b/skrl/agents/torch/q_learning/q_learning.py
@@ -27,7 +27,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py
index 886c6f25..26145f59 100644
--- a/skrl/agents/torch/sac/sac.py
+++ b/skrl/agents/torch/sac/sac.py
@@ -45,7 +45,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py
index 9871fe8c..fdad6030 100644
--- a/skrl/agents/torch/sarsa/sarsa.py
+++ b/skrl/agents/torch/sarsa/sarsa.py
@@ -27,7 +27,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py
index c3c66442..ae62cbf9 100644
--- a/skrl/agents/torch/td3/td3.py
+++ b/skrl/agents/torch/td3/td3.py
@@ -50,7 +50,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py
index b06c84fe..e9766f8c 100644
--- a/skrl/agents/torch/trpo/trpo.py
+++ b/skrl/agents/torch/trpo/trpo.py
@@ -53,7 +53,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "store_separately": True,           # whether to store checkpoints separately
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 

From f8fe773fab70d4aa059fa2d890526ee29a310fd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 6 Sep 2022 14:06:53 +0200
Subject: [PATCH 053/108] Save and load agents' modules

---
 skrl/agents/torch/base.py | 57 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 51 insertions(+), 6 deletions(-)

diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py
index f7c9ea07..616a05b5 100644
--- a/skrl/agents/torch/base.py
+++ b/skrl/agents/torch/base.py
@@ -10,6 +10,7 @@
 import torch
 from torch.utils.tensorboard import SummaryWriter
 
+from skrl import logger
 from ...memories.torch import Memory
 from ...models.torch import Model
 
@@ -87,7 +88,7 @@ def __str__(self) -> str:
                 string += "\n  |-- {}: {}".format(k, v)
         return string
 
-    def _empty_preprocessor(self, _input, *args, **kwargs) -> Any:
+    def _empty_preprocessor(self, _input: Any, *args, **kwargs) -> Any:
         """Empty preprocess method
 
         This method is defined because PyTorch multiprocessing can't pickle lambdas
@@ -100,6 +101,17 @@ def _empty_preprocessor(self, _input, *args, **kwargs) -> Any:
         """
         return _input
 
+    def _get_internal_value(self, _module: Any) -> Any:
+        """Get internal module/variable state/value
+
+        :param _input: Module or variable
+        :type _input: Any
+
+        :return: Module/variable state/value
+        :rtype: Any
+        """
+        return _module.state_dict() if hasattr(_module, "state_dict") else _module
+
     def init(self) -> None:
         """Initialize the agent
 
@@ -154,7 +166,7 @@ def write_tracking_data(self, timestep: int, timesteps: int) -> None:
         self.tracking_data.clear()
 
     def write_checkpoint(self, timestep: int, timesteps: int) -> None:
-        """Write checkpoint (models) to disk
+        """Write checkpoint (modules) to disk
 
         The checkpoints are saved in the directory 'checkpoints' in the experiment directory.
         The name of the checkpoint is the current timestep if timestep is not None, otherwise it is the current time.
@@ -168,15 +180,15 @@ def write_checkpoint(self, timestep: int, timesteps: int) -> None:
         # separated modules
         if self.checkpoint_store_separately:
             for name, module in self.checkpoint_modules.items():
-                torch.save(module.state_dict(), os.path.join(self.experiment_dir, "checkpoints", "{}_{}.pt".format(name, tag)))
+                torch.save(self._get_internal_value(module), os.path.join(self.experiment_dir, "checkpoints", "{}_{}.pt".format(name, tag)))
         # whole agent
         else:
             modules = {}
             for name, module in self.checkpoint_modules.items():
-                modules[name] = module.state_dict()
+                modules[name] = self._get_internal_value(module)
             torch.save(modules, os.path.join(self.experiment_dir, "checkpoints", "{}_{}.pt".format("agent", tag)))
 
-        # best models
+        # best modules
         if self.checkpoint_best_modules["modules"] and not self.checkpoint_best_modules["saved"]:
             # separated modules
             if self.checkpoint_store_separately:
@@ -290,6 +302,39 @@ def set_mode(self, mode: str) -> None:
             if model is not None:
                 model.set_mode(mode)
 
+    def save(self, path: str) -> None:
+        """Save the agent to the specified path
+
+        :param path: Path to save the model to
+        :type path: str
+        """
+        modules = {}
+        for name, module in self.checkpoint_modules.items():
+            modules[name] = self._get_internal_value(module)
+        torch.save(modules, path)
+
+    def load(self, path: str) -> None:
+        """Load the model from the specified path
+
+        The final storage device is determined by the constructor of the model
+
+        :param path: Path to load the model from
+        :type path: str
+        """
+        modules = torch.load(path, map_location=self.device)
+        if type(modules) is dict:
+            for name, data in modules.items():
+                module = self.checkpoint_modules.get(name, None)
+                if module is not None:
+                    if hasattr(module, "load_state_dict"):
+                        module.load_state_dict(data)
+                        if hasattr(module, "eval"):
+                            module.eval()
+                    else:
+                        raise NotImplementedError
+                else:
+                    logger.warning("Cannot load the {} module. The agent doesn't have such an instance".format(name))
+
     def pre_interaction(self, timestep: int, timesteps: int) -> None:
         """Callback called before the interaction with the environment
 
@@ -318,7 +363,7 @@ def post_interaction(self, timestep: int, timesteps: int) -> None:
                 self.checkpoint_best_modules["timestep"] = timestep
                 self.checkpoint_best_modules["reward"] = reward
                 self.checkpoint_best_modules["saved"] = False
-                self.checkpoint_best_modules["modules"] = {k: copy.deepcopy(v.state_dict()) for k, v in self.checkpoint_modules.items()}
+                self.checkpoint_best_modules["modules"] = {k: copy.deepcopy(self._get_internal_value(v)) for k, v in self.checkpoint_modules.items()}
 
             # write to tensorboard
             self.write_tracking_data(timestep, timesteps)

From fb25540635aa9214a66c5ede344d271c0be53e50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 6 Sep 2022 14:32:37 +0200
Subject: [PATCH 054/108] Update OpenAI Gym documentation domain link

---
 README.md                                      | 2 +-
 docs/source/index.rst                          | 2 +-
 docs/source/intro/installation.rst             | 2 +-
 docs/source/modules/skrl.agents.a2c.rst        | 2 +-
 docs/source/modules/skrl.agents.amp.rst        | 2 +-
 docs/source/modules/skrl.agents.cem.rst        | 2 +-
 docs/source/modules/skrl.agents.ddpg.rst       | 2 +-
 docs/source/modules/skrl.agents.ddqn.rst       | 2 +-
 docs/source/modules/skrl.agents.dqn.rst        | 2 +-
 docs/source/modules/skrl.agents.ppo.rst        | 2 +-
 docs/source/modules/skrl.agents.q_learning.rst | 2 +-
 docs/source/modules/skrl.agents.sac.rst        | 2 +-
 docs/source/modules/skrl.agents.sarsa.rst      | 2 +-
 docs/source/modules/skrl.agents.td3.rst        | 2 +-
 docs/source/modules/skrl.agents.trpo.rst       | 2 +-
 docs/source/modules/skrl.envs.wrapping.rst     | 4 ++--
 16 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 2f3b3ecd..f9fa13a8 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 <h2 align="center" style="border-bottom: 0 !important;">SKRL - Reinforcement Learning library</h2>
 <br>
 
-**skrl** is an open-source modular library for Reinforcement Learning written in Python (using [PyTorch](https://pytorch.org/)) and designed with a focus on readability, simplicity, and transparency of algorithm implementation. In addition to supporting the [OpenAI Gym](https://www.gymlibrary.ml) and [DeepMind](https://github.com/deepmind/dm_env) environment interfaces, it allows loading and configuring [NVIDIA Isaac Gym](https://developer.nvidia.com/isaac-gym/) and [NVIDIA Omniverse Isaac Gym](https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/tutorial_gym_isaac_gym.html) environments, enabling agents' simultaneous training by scopes (subsets of environments among all available environments), which may or may not share resources, in the same run
+**skrl** is an open-source modular library for Reinforcement Learning written in Python (using [PyTorch](https://pytorch.org/)) and designed with a focus on readability, simplicity, and transparency of algorithm implementation. In addition to supporting the [OpenAI Gym](https://www.gymlibrary.dev) and [DeepMind](https://github.com/deepmind/dm_env) environment interfaces, it allows loading and configuring [NVIDIA Isaac Gym](https://developer.nvidia.com/isaac-gym/) and [NVIDIA Omniverse Isaac Gym](https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/tutorial_gym_isaac_gym.html) environments, enabling agents' simultaneous training by scopes (subsets of environments among all available environments), which may or may not share resources, in the same run
 
 <br>
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index f9c96941..0569fb7c 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,7 +1,7 @@
 SKRL - Reinforcement Learning library (|version|)
 =================================================
 
-**skrl** is an open-source modular library for Reinforcement Learning written in Python (using `PyTorch <https://pytorch.org/>`_) and designed with a focus on readability, simplicity, and transparency of algorithm implementation. In addition to supporting the `OpenAI Gym <https://www.gymlibrary.ml>`_ and `DeepMind <https://github.com/deepmind/dm_env>`_ environment interfaces, it allows loading and configuring `NVIDIA Isaac Gym <https://developer.nvidia.com/isaac-gym>`_ and `NVIDIA Omniverse Isaac Gym <https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/tutorial_gym_isaac_gym.html>`_ environments, enabling agents' simultaneous training by scopes (subsets of environments among all available environments), which may or may not share resources, in the same run
+**skrl** is an open-source modular library for Reinforcement Learning written in Python (using `PyTorch <https://pytorch.org/>`_) and designed with a focus on readability, simplicity, and transparency of algorithm implementation. In addition to supporting the `OpenAI Gym <https://www.gymlibrary.dev>`_ and `DeepMind <https://github.com/deepmind/dm_env>`_ environment interfaces, it allows loading and configuring `NVIDIA Isaac Gym <https://developer.nvidia.com/isaac-gym>`_ and `NVIDIA Omniverse Isaac Gym <https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/tutorial_gym_isaac_gym.html>`_ environments, enabling agents' simultaneous training by scopes (subsets of environments among all available environments), which may or may not share resources, in the same run
 
 **Main features:**
     * Clean code
diff --git a/docs/source/intro/installation.rst b/docs/source/intro/installation.rst
index 3c96b006..eaf11187 100644
--- a/docs/source/intro/installation.rst
+++ b/docs/source/intro/installation.rst
@@ -10,7 +10,7 @@ Prerequisites
 
 **skrl** requires Python 3.6 or higher and the following libraries (they will be installed automatically):
 
-    * `gym <https://www.gymlibrary.ml>`_
+    * `gym <https://www.gymlibrary.dev>`_
     * `tqdm <https://tqdm.github.io>`_
     * `torch <https://pytorch.org>`_ 1.8.0 or higher
     * `tensorboard <https://www.tensorflow.org/tensorboard>`_
diff --git a/docs/source/modules/skrl.agents.a2c.rst b/docs/source/modules/skrl.agents.a2c.rst
index 7413838e..f2d52e46 100644
--- a/docs/source/modules/skrl.agents.a2c.rst
+++ b/docs/source/modules/skrl.agents.a2c.rst
@@ -82,7 +82,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.agents.amp.rst b/docs/source/modules/skrl.agents.amp.rst
index 314ecf6a..4ee3db5b 100644
--- a/docs/source/modules/skrl.agents.amp.rst
+++ b/docs/source/modules/skrl.agents.amp.rst
@@ -103,7 +103,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.agents.cem.rst b/docs/source/modules/skrl.agents.cem.rst
index 0001678b..6ef63e4c 100644
--- a/docs/source/modules/skrl.agents.cem.rst
+++ b/docs/source/modules/skrl.agents.cem.rst
@@ -38,7 +38,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.agents.ddpg.rst b/docs/source/modules/skrl.agents.ddpg.rst
index 51dfc657..ae57fc80 100644
--- a/docs/source/modules/skrl.agents.ddpg.rst
+++ b/docs/source/modules/skrl.agents.ddpg.rst
@@ -66,7 +66,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.agents.ddqn.rst b/docs/source/modules/skrl.agents.ddqn.rst
index afeddc73..f12ac7c1 100644
--- a/docs/source/modules/skrl.agents.ddqn.rst
+++ b/docs/source/modules/skrl.agents.ddqn.rst
@@ -45,7 +45,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.agents.dqn.rst b/docs/source/modules/skrl.agents.dqn.rst
index 77eb0e89..0ed111f8 100644
--- a/docs/source/modules/skrl.agents.dqn.rst
+++ b/docs/source/modules/skrl.agents.dqn.rst
@@ -45,7 +45,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.agents.ppo.rst b/docs/source/modules/skrl.agents.ppo.rst
index 5260bddc..8be4f1f1 100644
--- a/docs/source/modules/skrl.agents.ppo.rst
+++ b/docs/source/modules/skrl.agents.ppo.rst
@@ -98,7 +98,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.agents.q_learning.rst b/docs/source/modules/skrl.agents.q_learning.rst
index b5dee107..9b270188 100644
--- a/docs/source/modules/skrl.agents.q_learning.rst
+++ b/docs/source/modules/skrl.agents.q_learning.rst
@@ -36,7 +36,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.agents.sac.rst b/docs/source/modules/skrl.agents.sac.rst
index 599c9ff0..20dbe252 100644
--- a/docs/source/modules/skrl.agents.sac.rst
+++ b/docs/source/modules/skrl.agents.sac.rst
@@ -73,7 +73,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.agents.sarsa.rst b/docs/source/modules/skrl.agents.sarsa.rst
index 1c5fba3f..dacfb320 100644
--- a/docs/source/modules/skrl.agents.sarsa.rst
+++ b/docs/source/modules/skrl.agents.sarsa.rst
@@ -35,7 +35,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.agents.td3.rst b/docs/source/modules/skrl.agents.td3.rst
index 2b8108fb..74bb068e 100644
--- a/docs/source/modules/skrl.agents.td3.rst
+++ b/docs/source/modules/skrl.agents.td3.rst
@@ -76,7 +76,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.agents.trpo.rst b/docs/source/modules/skrl.agents.trpo.rst
index d4e87b78..52da1555 100644
--- a/docs/source/modules/skrl.agents.trpo.rst
+++ b/docs/source/modules/skrl.agents.trpo.rst
@@ -136,7 +136,7 @@ Configuration and hyperparameters
 Spaces and models
 ^^^^^^^^^^^^^^^^^
 
-The implementation supports the following `Gym spaces <https://www.gymlibrary.ml/content/spaces>`_
+The implementation supports the following `Gym spaces <https://www.gymlibrary.dev/content/spaces>`_
 
 .. list-table::
    :header-rows: 1
diff --git a/docs/source/modules/skrl.envs.wrapping.rst b/docs/source/modules/skrl.envs.wrapping.rst
index f7489a94..3f0a7be5 100644
--- a/docs/source/modules/skrl.envs.wrapping.rst
+++ b/docs/source/modules/skrl.envs.wrapping.rst
@@ -3,7 +3,7 @@ Wrapping
 
 This library works with a common API to interact with the following RL environments:
 
-* `OpenAI Gym <https://www.gymlibrary.ml>`_ (single and vectorized environments)
+* `OpenAI Gym <https://www.gymlibrary.dev>`_ (single and vectorized environments)
 * `DeepMind <https://github.com/deepmind/dm_env>`_
 * `NVIDIA Isaac Gym <https://developer.nvidia.com/isaac-gym>`_ (preview 2, 3 and 4)
 * `NVIDIA Omniverse Isaac Gym <https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/tutorial_gym_isaac_gym.html>`_
@@ -150,7 +150,7 @@ Basic usage
 
             .. tab:: Vectorized environment
 
-                Visit the OpenAI Gym documentation (`Vector API <https://www.gymlibrary.ml/content/vector_api>`_) for more information about the creation and usage of vectorized environments
+                Visit the OpenAI Gym documentation (`Vector API <https://www.gymlibrary.dev/content/vector_api>`_) for more information about the creation and usage of vectorized environments
 
                 .. code-block:: python
                     :linenos:

From cf92e72121d6714841c70cb0989f72d7166988c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 6 Sep 2022 23:32:46 +0200
Subject: [PATCH 055/108] Modify logging format

---
 skrl/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skrl/__init__.py b/skrl/__init__.py
index 37fdd2aa..d6928d79 100644
--- a/skrl/__init__.py
+++ b/skrl/__init__.py
@@ -12,7 +12,7 @@
 
 # logger with format
 class _Formatter(logging.Formatter):
-    _format = "%(name)s:%(levelname)s - %(message)s (%(module)s:%(funcName)s:%(lineno)d)"
+    _format = "[%(name)s:%(levelname)s] %(message)s"
     _formats = {logging.DEBUG: f"\x1b[38;20m{_format}\x1b[0m",
                 logging.INFO: f"\x1b[38;20m{_format}\x1b[0m",
                 logging.WARNING: f"\x1b[33;20m{_format}\x1b[0m",

From 496fe7784ba7029fa82df664fe8779ea4b02475a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 6 Sep 2022 23:36:25 +0200
Subject: [PATCH 056/108] Migrate from external models by specifying its path

---
 skrl/models/torch/base.py | 165 +++++++++++++++++++++++++++++++++-----
 1 file changed, 143 insertions(+), 22 deletions(-)

diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py
index 93b98074..9973cc4d 100644
--- a/skrl/models/torch/base.py
+++ b/skrl/models/torch/base.py
@@ -357,7 +357,7 @@ def act(self,
                  Deterministic agents must ignore the last two components and return empty tensors or None for them
         :rtype: sequence of torch.Tensor
         """
-        logger.warn("Make sure to place Mixins before Model during model definition")
+        logger.warning("Make sure to place Mixins before Model during model definition")
         raise NotImplementedError("The action to be taken by the agent (.act()) is not implemented")
         
     def set_mode(self, mode: str) -> None:
@@ -394,7 +394,6 @@ def save(self, path: str, state_dict: Optional[dict] = None) -> None:
             >>> old_state_dict = copy.deepcopy(model.state_dict())
             >>> # ...
             >>> model.save("/tmp/model.pt", old_state_dict)
-
         """
         torch.save(self.state_dict() if state_dict is None else state_dict, path)
 
@@ -420,37 +419,143 @@ def load(self, path: str) -> None:
         self.eval()
 
     def migrate(self,
-                state_dict: Mapping[str, torch.Tensor],
+                state_dict: Optional[Mapping[str, torch.Tensor]] = None,
+                path: Optional[str] = None,
                 name_map: Mapping[str, str] = {},
                 auto_mapping: bool = True,
-                show_names: bool = False) -> bool:
+                verbose: bool = False) -> bool:
         """Migrate the specified extrernal model's state dict to the current model
 
-        :param state_dict: External model's state dict to migrate from
-        :type state_dict: Mapping[str, torch.Tensor]
+        The final storage device is determined by the constructor of the model
+
+        Only one of ``state_dict`` or ``path`` can be specified.
+        The ``path`` parameter allows automatic loading the ``state_dict`` only from files generated 
+        by the *rl_games* and *stable-baselines3* libraries at the moment
+
+        For ambiguous models (where 2 or more parameters, for source or current model, have equal shape)
+        it is necessary to define the ``name_map``, at least for those parameters, to perform the migration successfully
+
+        :param state_dict: External model's state dict to migrate from (default: ``None``)
+        :type state_dict: Mapping[str, torch.Tensor], optional
+        :param path: Path to the external checkpoint to migrate from (default: ``None``)
+        :type path: str, optional
         :param name_map: Name map to use for the migration (default: ``{}``).
                          Keys are the current parameter names and values are the external parameter names
         :type name_map: Mapping[str, str], optional
         :param auto_mapping: Automatically map the external state dict to the current state dict (default: ``True``)
         :type auto_mapping: bool, optional
-        :param show_names: Show the names of both, current and external state dicts parameters (default: ``False``)
-        :type show_names: bool, optional
+        :param verbose: Show model names and migration (default: ``False``)
+        :type verbose: bool, optional
+
+        :raises ValueError: If neither or both of ``state_dict`` and ``path`` parameters have been set
+        :raises ValueError: If the correct file type cannot be identified from the ``path`` parameter
 
         :return: True if the migration was successful, False otherwise.
                  Migration is successful if all parameters of the current model are found in the external model
         :rtype: bool
+
+        Example::
+
+            # migrate a rl_games checkpoint with unambiguous state_dict
+            >>> model.migrate(path="./runs/Ant/nn/Ant.pth")
+            True
+
+            # migrate a rl_games checkpoint with ambiguous state_dict
+            >>> model.migrate(path="./runs/Cartpole/nn/Cartpole.pth", verbose=False)
+            [skrl:WARNING] Ambiguous match for log_std_parameter <- [value_mean_std.running_mean, value_mean_std.running_var, a2c_network.sigma]
+            [skrl:WARNING] Ambiguous match for net.0.bias <- [a2c_network.actor_mlp.0.bias, a2c_network.actor_mlp.2.bias]
+            [skrl:WARNING] Ambiguous match for net.2.bias <- [a2c_network.actor_mlp.0.bias, a2c_network.actor_mlp.2.bias]
+            [skrl:WARNING] Ambiguous match for net.4.weight <- [a2c_network.value.weight, a2c_network.mu.weight]
+            [skrl:WARNING] Ambiguous match for net.4.bias <- [a2c_network.value.bias, a2c_network.mu.bias]
+            [skrl:WARNING] Multiple use of a2c_network.actor_mlp.0.bias -> [net.0.bias, net.2.bias]
+            [skrl:WARNING] Multiple use of a2c_network.actor_mlp.2.bias -> [net.0.bias, net.2.bias]
+            False
+            >>> name_map = {"log_std_parameter": "a2c_network.sigma",
+            ...             "net.0.bias": "a2c_network.actor_mlp.0.bias",
+            ...             "net.2.bias": "a2c_network.actor_mlp.2.bias",
+            ...             "net.4.weight": "a2c_network.mu.weight",
+            ...             "net.4.bias": "a2c_network.mu.bias"}
+            >>> model.migrate(path="./runs/Cartpole/nn/Cartpole.pth", name_map=name_map, verbose=True)
+            [skrl:INFO] Models
+            [skrl:INFO]   |-- current: 7 items
+            [skrl:INFO]   |    |-- log_std_parameter : torch.Size([1])
+            [skrl:INFO]   |    |-- net.0.weight : torch.Size([32, 4])
+            [skrl:INFO]   |    |-- net.0.bias : torch.Size([32])
+            [skrl:INFO]   |    |-- net.2.weight : torch.Size([32, 32])
+            [skrl:INFO]   |    |-- net.2.bias : torch.Size([32])
+            [skrl:INFO]   |    |-- net.4.weight : torch.Size([1, 32])
+            [skrl:INFO]   |    |-- net.4.bias : torch.Size([1])
+            [skrl:INFO]   |-- source: 15 items
+            [skrl:INFO]   |    |-- value_mean_std.running_mean : torch.Size([1])
+            [skrl:INFO]   |    |-- value_mean_std.running_var : torch.Size([1])
+            [skrl:INFO]   |    |-- value_mean_std.count : torch.Size([])
+            [skrl:INFO]   |    |-- running_mean_std.running_mean : torch.Size([4])
+            [skrl:INFO]   |    |-- running_mean_std.running_var : torch.Size([4])
+            [skrl:INFO]   |    |-- running_mean_std.count : torch.Size([])
+            [skrl:INFO]   |    |-- a2c_network.sigma : torch.Size([1])
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.0.weight : torch.Size([32, 4])
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.0.bias : torch.Size([32])
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.2.weight : torch.Size([32, 32])
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.2.bias : torch.Size([32])
+            [skrl:INFO]   |    |-- a2c_network.value.weight : torch.Size([1, 32])
+            [skrl:INFO]   |    |-- a2c_network.value.bias : torch.Size([1])
+            [skrl:INFO]   |    |-- a2c_network.mu.weight : torch.Size([1, 32])
+            [skrl:INFO]   |    |-- a2c_network.mu.bias : torch.Size([1])
+            [skrl:INFO] Migration
+            [skrl:INFO]   |-- map:  log_std_parameter <- a2c_network.sigma
+            [skrl:INFO]   |-- auto: net.0.weight <- a2c_network.actor_mlp.0.weight
+            [skrl:INFO]   |-- map:  net.0.bias <- a2c_network.actor_mlp.0.bias
+            [skrl:INFO]   |-- auto: net.2.weight <- a2c_network.actor_mlp.2.weight
+            [skrl:INFO]   |-- map:  net.2.bias <- a2c_network.actor_mlp.2.bias
+            [skrl:INFO]   |-- map:  net.4.weight <- a2c_network.mu.weight
+            [skrl:INFO]   |-- map:  net.4.bias <- a2c_network.mu.bias
+            False
+
+            # migrate a stable-baselines3 checkpoint with unambiguous state_dict
+            >>> model.migrate(path="./ddpg_pendulum.zip")
+            True
+
+            # migrate from any exported model by loading its state_dict (unambiguous state_dict)
+            >>> state_dict = torch.load("./external_model.pt")
+            >>> model.migrate(state_dict=state_dict)
+            True
         """
-        # Show state_dict
-        if show_names:
-            print("Model migration")
-            print("Current state_dict:")
+        if (state_dict is not None) + (path is not None) != 1:
+            raise ValueError("Exactly one of state_dict or path may be specified")
+
+        # load state_dict from path
+        if path is not None:
+            state_dict = {}
+            # rl_games checkpoint
+            if path.endswith(".pt") or path.endswith(".pth"):
+                checkpoint = torch.load(path, map_location=self.device)
+                if type(checkpoint) is dict:
+                    state_dict = checkpoint.get("model", {})
+            # stable-baselines3
+            elif path.endswith(".zip"):
+                import zipfile
+                try:
+                    archive = zipfile.ZipFile(path, 'r')
+                    with archive.open('policy.pth', mode="r") as file:
+                        state_dict = torch.load(file, map_location=self.device)
+                except KeyError as e:
+                    logger.warning(str(e))
+                    state_dict = {}
+            else:
+                raise ValueError("Cannot identify file type")
+
+        # show state_dict
+        if verbose:
+            logger.info("Models")
+            logger.info("  |-- current: {} items".format(len(self.state_dict().keys())))
             for name, tensor in self.state_dict().items():
-                print("  |-- {} : {}".format(name, tensor.shape))
-            print("Source state_dict:")
+                logger.info("  |    |-- {} : {}".format(name, tensor.shape))
+            logger.info("  |-- source: {} items".format(len(state_dict.keys())))
             for name, tensor in state_dict.items():
-                print("  |-- {} : {}".format(name, tensor.shape))
+                logger.info("  |    |-- {} : {}".format(name, tensor.shape))
+            logger.info("Migration")
 
-        # migrate the state dict to current model
+        # migrate the state_dict to current model
         new_state_dict = collections.OrderedDict()
         match_counter = collections.defaultdict(list)
         used_counter = collections.defaultdict(list)
@@ -462,43 +567,51 @@ def migrate(self,
                         new_state_dict[name] = external_tensor
                         match_counter[name].append(external_name)
                         used_counter[external_name].append(name)
+                        if verbose:
+                            logger.info("  |-- map:  {} <- {}".format(name, external_name))
                         break
                     else:
-                        print("Shape mismatch for {} <- {} : {} != {}".format(name, external_name, tensor.shape, external_tensor.shape))
+                        logger.warning("Shape mismatch for {} <- {} : {} != {}".format(name, external_name, tensor.shape, external_tensor.shape))
                 # auto-mapped names
-                if auto_mapping:
+                if auto_mapping and name not in name_map:
                     if tensor.shape == external_tensor.shape:
                         if name.endswith(".weight"):
                             if external_name.endswith(".weight"):
                                 new_state_dict[name] = external_tensor
                                 match_counter[name].append(external_name)
                                 used_counter[external_name].append(name)
+                                if verbose:
+                                    logger.info("  |-- auto: {} <- {}".format(name, external_name))
                         elif name.endswith(".bias"):
                             if external_name.endswith(".bias"):
                                 new_state_dict[name] = external_tensor
                                 match_counter[name].append(external_name)
                                 used_counter[external_name].append(name)
+                                if verbose:
+                                    logger.info("  |-- auto: {} <- {}".format(name, external_name))
                         else:
                             if not external_name.endswith(".weight") and not external_name.endswith(".bias"):
                                 new_state_dict[name] = external_tensor
                                 match_counter[name].append(external_name)
                                 used_counter[external_name].append(name)
+                                if verbose:
+                                    logger.info("  |-- auto: {} <- {}".format(name, external_name))
 
         # show ambiguous matches
         status = True
         for name, tensor in self.state_dict().items():
             if len(match_counter.get(name, [])) > 1:
-                print("Ambiguous match for {} <- {}".format(name, match_counter.get(name, [])))
+                logger.warning("Ambiguous match for {} <- [{}]".format(name, ", ".join(match_counter.get(name, []))))
                 status = False
         # show missing matches
         for name, tensor in self.state_dict().items():
             if not match_counter.get(name, []):
-                print("Missing match for {}".format(name))
+                logger.warning("Missing match for {}".format(name))
                 status = False
-        # show duplicated uses
+        # show multiple uses
         for name, tensor in state_dict.items():
             if len(used_counter.get(name, [])) > 1:
-                print("Duplicated use of {} -> {}".format(name, used_counter.get(name, [])))
+                logger.warning("Multiple use of {} -> [{}]".format(name, ", ".join(used_counter.get(name, []))))
                 status = False
 
         # load new state dict
@@ -515,6 +628,14 @@ def freeze_parameters(self, freeze: bool = True) -> None:
         
         :param freeze: Freeze the internal parameters if True, otherwise unfreeze them (default: ``True``)
         :type freeze: bool, optional
+
+        Example::
+
+            # freeze model parameters
+            >>> model.freeze_parameters(True)
+
+            # unfreeze model parameters
+            >>> model.freeze_parameters(False)
         """
         for parameters in self.parameters():
             parameters.requires_grad = not freeze

From b89036a7491af904ceaf2e8bf7aaf9e14fae615f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Wed, 7 Sep 2022 18:38:06 +0200
Subject: [PATCH 057/108] Allow migrating extrenal state_dict/checkpoint

---
 skrl/agents/torch/base.py | 240 +++++++++++++++++++++++++++++++++++++-
 skrl/models/torch/base.py |   4 +-
 2 files changed, 241 insertions(+), 3 deletions(-)

diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py
index 616a05b5..3483a994 100644
--- a/skrl/agents/torch/base.py
+++ b/skrl/agents/torch/base.py
@@ -1,4 +1,4 @@
-from typing import Union, Tuple, Dict, Any
+from typing import Union, Mapping, Tuple, Dict, Any
 
 import os
 import gym
@@ -335,6 +335,244 @@ def load(self, path: str) -> None:
                 else:
                     logger.warning("Cannot load the {} module. The agent doesn't have such an instance".format(name))
 
+    def migrate(self,
+                path: str,
+                name_map: Mapping[str, Mapping[str, str]] = {},
+                auto_mapping: bool = True,
+                verbose: bool = False) -> bool:
+        """Migrate the specified extrernal checkpoint to the current agent
+
+        The final storage device is determined by the constructor of the agent.
+        Only files generated by the *rl_games* library are supported at the moment
+
+        For ambiguous models (where 2 or more parameters, for source or current model, have equal shape)
+        it is necessary to define the ``name_map``, at least for those parameters, to perform the migration successfully
+
+        :param path: Path to the external checkpoint to migrate from
+        :type path: str
+        :param name_map: Name map to use for the migration (default: ``{}``).
+                         Keys are the current parameter names and values are the external parameter names
+        :type name_map: Mapping[str, Mapping[str, str]], optional
+        :param auto_mapping: Automatically map the external state dict to the current state dict (default: ``True``)
+        :type auto_mapping: bool, optional
+        :param verbose: Show model names and migration (default: ``False``)
+        :type verbose: bool, optional
+
+        :raises ValueError: If the correct file type cannot be identified from the ``path`` parameter
+
+        :return: True if the migration was successful, False otherwise.
+                 Migration is successful if all parameters of the current model are found in the external model
+        :rtype: bool
+
+        Example::
+
+            # migrate a rl_games checkpoint with ambiguous state_dict
+            >>> agent.migrate(path="./runs/Cartpole/nn/Cartpole.pth", verbose=False)
+            [skrl:WARNING] Ambiguous match for net.0.bias <- [a2c_network.actor_mlp.0.bias, a2c_network.actor_mlp.2.bias]
+            [skrl:WARNING] Ambiguous match for net.2.bias <- [a2c_network.actor_mlp.0.bias, a2c_network.actor_mlp.2.bias]
+            [skrl:WARNING] Ambiguous match for net.4.weight <- [a2c_network.value.weight, a2c_network.mu.weight]
+            [skrl:WARNING] Ambiguous match for net.4.bias <- [a2c_network.value.bias, a2c_network.mu.bias]
+            [skrl:WARNING] Multiple use of a2c_network.actor_mlp.0.bias -> [net.0.bias, net.2.bias]
+            [skrl:WARNING] Multiple use of a2c_network.actor_mlp.2.bias -> [net.0.bias, net.2.bias]
+            [skrl:WARNING] Ambiguous match for net.0.bias <- [a2c_network.actor_mlp.0.bias, a2c_network.actor_mlp.2.bias]
+            [skrl:WARNING] Ambiguous match for net.2.bias <- [a2c_network.actor_mlp.0.bias, a2c_network.actor_mlp.2.bias]
+            [skrl:WARNING] Ambiguous match for net.4.weight <- [a2c_network.value.weight, a2c_network.mu.weight]
+            [skrl:WARNING] Ambiguous match for net.4.bias <- [a2c_network.value.bias, a2c_network.mu.bias]
+            [skrl:WARNING] Multiple use of a2c_network.actor_mlp.0.bias -> [net.0.bias, net.2.bias]
+            [skrl:WARNING] Multiple use of a2c_network.actor_mlp.2.bias -> [net.0.bias, net.2.bias]
+            False
+            >>> name_map = {"policy": {"net.0.bias": "a2c_network.actor_mlp.0.bias",
+            ...                        "net.2.bias": "a2c_network.actor_mlp.2.bias",
+            ...                        "net.4.weight": "a2c_network.mu.weight",
+            ...                        "net.4.bias": "a2c_network.mu.bias"},
+            ...             "value": {"net.0.bias": "a2c_network.actor_mlp.0.bias",
+            ...                       "net.2.bias": "a2c_network.actor_mlp.2.bias",
+            ...                       "net.4.weight": "a2c_network.value.weight",
+            ...                       "net.4.bias": "a2c_network.value.bias"}}
+            >>> model.migrate(path="./runs/Cartpole/nn/Cartpole.pth", name_map=name_map, verbose=True)
+            [skrl:INFO] Modules
+            [skrl:INFO]   |-- current
+            [skrl:INFO]   |    |-- policy (Policy)
+            [skrl:INFO]   |    |    |-- log_std_parameter : [1]
+            [skrl:INFO]   |    |    |-- net.0.weight : [32, 4]
+            [skrl:INFO]   |    |    |-- net.0.bias : [32]
+            [skrl:INFO]   |    |    |-- net.2.weight : [32, 32]
+            [skrl:INFO]   |    |    |-- net.2.bias : [32]
+            [skrl:INFO]   |    |    |-- net.4.weight : [1, 32]
+            [skrl:INFO]   |    |    |-- net.4.bias : [1]
+            [skrl:INFO]   |    |-- value (Value)
+            [skrl:INFO]   |    |    |-- net.0.weight : [32, 4]
+            [skrl:INFO]   |    |    |-- net.0.bias : [32]
+            [skrl:INFO]   |    |    |-- net.2.weight : [32, 32]
+            [skrl:INFO]   |    |    |-- net.2.bias : [32]
+            [skrl:INFO]   |    |    |-- net.4.weight : [1, 32]
+            [skrl:INFO]   |    |    |-- net.4.bias : [1]
+            [skrl:INFO]   |    |-- optimizer (Adam)
+            [skrl:INFO]   |    |    |-- state (dict)
+            [skrl:INFO]   |    |    |-- param_groups (list)
+            [skrl:INFO]   |    |-- state_preprocessor (RunningStandardScaler)
+            [skrl:INFO]   |    |    |-- running_mean : [4]
+            [skrl:INFO]   |    |    |-- running_variance : [4]
+            [skrl:INFO]   |    |    |-- current_count : []
+            [skrl:INFO]   |    |-- value_preprocessor (RunningStandardScaler)
+            [skrl:INFO]   |    |    |-- running_mean : [1]
+            [skrl:INFO]   |    |    |-- running_variance : [1]
+            [skrl:INFO]   |    |    |-- current_count : []
+            [skrl:INFO]   |-- source
+            [skrl:INFO]   |    |-- model (OrderedDict)
+            [skrl:INFO]   |    |    |-- value_mean_std.running_mean : [1]
+            [skrl:INFO]   |    |    |-- value_mean_std.running_var : [1]
+            [skrl:INFO]   |    |    |-- value_mean_std.count : []
+            [skrl:INFO]   |    |    |-- running_mean_std.running_mean : [4]
+            [skrl:INFO]   |    |    |-- running_mean_std.running_var : [4]
+            [skrl:INFO]   |    |    |-- running_mean_std.count : []
+            [skrl:INFO]   |    |    |-- a2c_network.sigma : [1]
+            [skrl:INFO]   |    |    |-- a2c_network.actor_mlp.0.weight : [32, 4]
+            [skrl:INFO]   |    |    |-- a2c_network.actor_mlp.0.bias : [32]
+            [skrl:INFO]   |    |    |-- a2c_network.actor_mlp.2.weight : [32, 32]
+            [skrl:INFO]   |    |    |-- a2c_network.actor_mlp.2.bias : [32]
+            [skrl:INFO]   |    |    |-- a2c_network.value.weight : [1, 32]
+            [skrl:INFO]   |    |    |-- a2c_network.value.bias : [1]
+            [skrl:INFO]   |    |    |-- a2c_network.mu.weight : [1, 32]
+            [skrl:INFO]   |    |    |-- a2c_network.mu.bias : [1]
+            [skrl:INFO]   |    |-- epoch (int)
+            [skrl:INFO]   |    |-- optimizer (dict)
+            [skrl:INFO]   |    |-- frame (int)
+            [skrl:INFO]   |    |-- last_mean_rewards (float32)
+            [skrl:INFO]   |    |-- env_state (NoneType)
+            [skrl:INFO] Migration
+            [skrl:INFO] Model: policy (Policy)
+            [skrl:INFO] Models
+            [skrl:INFO]   |-- current: 7 items
+            [skrl:INFO]   |    |-- log_std_parameter : [1]
+            [skrl:INFO]   |    |-- net.0.weight : [32, 4]
+            [skrl:INFO]   |    |-- net.0.bias : [32]
+            [skrl:INFO]   |    |-- net.2.weight : [32, 32]
+            [skrl:INFO]   |    |-- net.2.bias : [32]
+            [skrl:INFO]   |    |-- net.4.weight : [1, 32]
+            [skrl:INFO]   |    |-- net.4.bias : [1]
+            [skrl:INFO]   |-- source: 9 items
+            [skrl:INFO]   |    |-- a2c_network.sigma : [1]
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.0.weight : [32, 4]
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.0.bias : [32]
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.2.weight : [32, 32]
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.2.bias : [32]
+            [skrl:INFO]   |    |-- a2c_network.value.weight : [1, 32]
+            [skrl:INFO]   |    |-- a2c_network.value.bias : [1]
+            [skrl:INFO]   |    |-- a2c_network.mu.weight : [1, 32]
+            [skrl:INFO]   |    |-- a2c_network.mu.bias : [1]
+            [skrl:INFO] Migration
+            [skrl:INFO]   |-- auto: log_std_parameter <- a2c_network.sigma
+            [skrl:INFO]   |-- auto: net.0.weight <- a2c_network.actor_mlp.0.weight
+            [skrl:INFO]   |-- map:  net.0.bias <- a2c_network.actor_mlp.0.bias
+            [skrl:INFO]   |-- auto: net.2.weight <- a2c_network.actor_mlp.2.weight
+            [skrl:INFO]   |-- map:  net.2.bias <- a2c_network.actor_mlp.2.bias
+            [skrl:INFO]   |-- map:  net.4.weight <- a2c_network.mu.weight
+            [skrl:INFO]   |-- map:  net.4.bias <- a2c_network.mu.bias
+            [skrl:INFO] Model: value (Value)
+            [skrl:INFO] Models
+            [skrl:INFO]   |-- current: 6 items
+            [skrl:INFO]   |    |-- net.0.weight : [32, 4]
+            [skrl:INFO]   |    |-- net.0.bias : [32]
+            [skrl:INFO]   |    |-- net.2.weight : [32, 32]
+            [skrl:INFO]   |    |-- net.2.bias : [32]
+            [skrl:INFO]   |    |-- net.4.weight : [1, 32]
+            [skrl:INFO]   |    |-- net.4.bias : [1]
+            [skrl:INFO]   |-- source: 9 items
+            [skrl:INFO]   |    |-- a2c_network.sigma : [1]
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.0.weight : [32, 4]
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.0.bias : [32]
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.2.weight : [32, 32]
+            [skrl:INFO]   |    |-- a2c_network.actor_mlp.2.bias : [32]
+            [skrl:INFO]   |    |-- a2c_network.value.weight : [1, 32]
+            [skrl:INFO]   |    |-- a2c_network.value.bias : [1]
+            [skrl:INFO]   |    |-- a2c_network.mu.weight : [1, 32]
+            [skrl:INFO]   |    |-- a2c_network.mu.bias : [1]
+            [skrl:INFO] Migration
+            [skrl:INFO]   |-- auto: net.0.weight <- a2c_network.actor_mlp.0.weight
+            [skrl:INFO]   |-- map:  net.0.bias <- a2c_network.actor_mlp.0.bias
+            [skrl:INFO]   |-- auto: net.2.weight <- a2c_network.actor_mlp.2.weight
+            [skrl:INFO]   |-- map:  net.2.bias <- a2c_network.actor_mlp.2.bias
+            [skrl:INFO]   |-- map:  net.4.weight <- a2c_network.value.weight
+            [skrl:INFO]   |-- map:  net.4.bias <- a2c_network.value.bias
+            True
+        """
+        # load state_dict from path
+        if path is not None:
+            # rl_games checkpoint
+            if path.endswith(".pt") or path.endswith(".pth"):
+                checkpoint = torch.load(path, map_location=self.device)
+            else:
+                raise ValueError("Cannot identify file type")
+
+        # show modules
+        if verbose:
+            logger.info("Modules")
+            logger.info("  |-- current")
+            for name, module in self.checkpoint_modules.items():
+                logger.info("  |    |-- {} ({})".format(name, type(module).__name__))
+                if hasattr(module, "state_dict"):
+                    for k, v in module.state_dict().items():
+                        if hasattr(v, "shape"):
+                            logger.info("  |    |    |-- {} : {}".format(k, list(v.shape)))
+                        else:
+                            logger.info("  |    |    |-- {} ({})".format(k, type(v).__name__))
+            logger.info("  |-- source")
+            for name, module in checkpoint.items():
+                logger.info("  |    |-- {} ({})".format(name, type(module).__name__))
+                if name == "model":
+                    for k, v in module.items():
+                        logger.info("  |    |    |-- {} : {}".format(k, list(v.shape)))
+                else:
+                    if hasattr(module, "state_dict"):
+                        for k, v in module.state_dict().items():
+                            if hasattr(v, "shape"):
+                                logger.info("  |    |    |-- {} : {}".format(k, list(v.shape)))
+                            else:
+                                logger.info("  |    |    |-- {} ({})".format(k, type(v).__name__))
+            logger.info("Migration")
+
+        if "optimizer" in self.checkpoint_modules:
+            # loaded state dict contains a parameter group that doesn't match the size of optimizer's group
+            # self.checkpoint_modules["optimizer"].load_state_dict(checkpoint["optimizer"])
+            pass
+        # state_preprocessor
+        if "state_preprocessor" in self.checkpoint_modules:
+            if "running_mean_std.running_mean" in checkpoint["model"]:
+                state_dict = copy.deepcopy(self.checkpoint_modules["state_preprocessor"].state_dict())
+                state_dict["running_mean"] = checkpoint["model"]["running_mean_std.running_mean"]
+                state_dict["running_variance"] = checkpoint["model"]["running_mean_std.running_var"]
+                state_dict["current_count"] = checkpoint["model"]["running_mean_std.count"]
+                self.checkpoint_modules["state_preprocessor"].load_state_dict(state_dict)
+                del checkpoint["model"]["running_mean_std.running_mean"]
+                del checkpoint["model"]["running_mean_std.running_var"]
+                del checkpoint["model"]["running_mean_std.count"]
+        # value_preprocessor
+        if "value_preprocessor" in self.checkpoint_modules:
+            if "value_mean_std.running_mean" in checkpoint["model"]:
+                state_dict = copy.deepcopy(self.checkpoint_modules["value_preprocessor"].state_dict())
+                state_dict["running_mean"] = checkpoint["model"]["value_mean_std.running_mean"]
+                state_dict["running_variance"] = checkpoint["model"]["value_mean_std.running_var"]
+                state_dict["current_count"] = checkpoint["model"]["value_mean_std.count"]
+                self.checkpoint_modules["value_preprocessor"].load_state_dict(state_dict)
+                del checkpoint["model"]["value_mean_std.running_mean"]
+                del checkpoint["model"]["value_mean_std.running_var"]
+                del checkpoint["model"]["value_mean_std.count"]
+        # TODO: AMP state preprocessor
+        # model
+        status = True
+        for name, module in self.checkpoint_modules.items():
+            if module not in ["state_preprocessor", "value_preprocessor", "optimizer"] and hasattr(module, "migrate"):
+                if verbose:
+                    logger.info("Model: {} ({})".format(name, type(module).__name__))
+                status *= module.migrate(state_dict=checkpoint["model"], 
+                                            name_map=name_map.get(name, {}), 
+                                            auto_mapping=auto_mapping, 
+                                            verbose=verbose)
+
+        self.set_mode("eval")
+        return bool(status)
+
     def pre_interaction(self, timestep: int, timesteps: int) -> None:
         """Callback called before the interaction with the environment
 
diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py
index 9973cc4d..73478f36 100644
--- a/skrl/models/torch/base.py
+++ b/skrl/models/torch/base.py
@@ -549,10 +549,10 @@ def migrate(self,
             logger.info("Models")
             logger.info("  |-- current: {} items".format(len(self.state_dict().keys())))
             for name, tensor in self.state_dict().items():
-                logger.info("  |    |-- {} : {}".format(name, tensor.shape))
+                logger.info("  |    |-- {} : {}".format(name, list(tensor.shape)))
             logger.info("  |-- source: {} items".format(len(state_dict.keys())))
             for name, tensor in state_dict.items():
-                logger.info("  |    |-- {} : {}".format(name, tensor.shape))
+                logger.info("  |    |-- {} : {}".format(name, list(tensor.shape)))
             logger.info("Migration")
 
         # migrate the state_dict to current model

From ec243b255b3c3f1248658d4fc5a012de377d89e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Wed, 7 Sep 2022 22:31:00 +0200
Subject: [PATCH 058/108] Remove model instantiator properties and methods from
 models

---
 skrl/models/torch/base.py                  | 39 ----------------------
 skrl/models/torch/categorical.py           |  8 ++---
 skrl/models/torch/deterministic.py         |  8 ++---
 skrl/models/torch/gaussian.py              | 11 ++----
 skrl/models/torch/multivariate_gaussian.py | 11 ++----
 5 files changed, 10 insertions(+), 67 deletions(-)

diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py
index 73478f36..06cfadf6 100644
--- a/skrl/models/torch/base.py
+++ b/skrl/models/torch/base.py
@@ -61,45 +61,6 @@ def act(self, states, taken_actions=None, role=""):
 
         self._random_distribution = None
 
-        # internal variables to be used by the model instantiators
-        self._instantiator_net = None
-        self._instantiator_input_type = 0
-        self._instantiator_parameter = None
-        self._instantiator_output_scale = 1.0
-        
-    def _get_instantiator_output(self, 
-                                 states: torch.Tensor, 
-                                 taken_actions: Optional[torch.Tensor] = None) -> Sequence[torch.Tensor]:
-        """Get the output of the instantiator model
-        
-        Input shape depends on the instantiator (see skrl.utils.model_instantiator.Shape) as follows:
-
-        - STATES / OBSERVATIONS = 0
-        - ACTIONS = -1
-        - STATES_ACTIONS = -2
-
-        :param states: Observation/state of the environment used to make the decision
-        :type states: torch.Tensor
-        :param taken_actions: Actions taken by a policy to the given states (default: ``None``)
-        :type taken_actions: torch.Tensor, optional
-
-        :return: Output of the instantiator model
-        :rtype: sequence of torch.Tensor
-        """
-        if self._instantiator_input_type == 0:
-            output = self._instantiator_net(states)
-        elif self._instantiator_input_type == -1:
-            output = self._instantiator_net(taken_actions)
-        elif self._instantiator_input_type == -2:
-            output = self._instantiator_net(torch.cat((states, taken_actions), dim=1))
-        
-        # deterministic and categorical output
-        if self._instantiator_parameter is None:
-            return output * self._instantiator_output_scale
-        # gaussian output
-        else:
-            return output * self._instantiator_output_scale, self._instantiator_parameter
-
     def _get_space_size(self, 
                         space: Union[int, Sequence[int], gym.Space],
                         number_of_elements: bool = True) -> int:
diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py
index 5c779510..19f94f10 100644
--- a/skrl/models/torch/categorical.py
+++ b/skrl/models/torch/categorical.py
@@ -86,12 +86,8 @@ def act(self,
             torch.Size([4096, 1]) torch.Size([4096, 1]) torch.Size([4096, 2])
         """
         # map from states/observations to normalized probabilities or unnormalized log probabilities
-        if self._instantiator_net is None:
-            output = self.compute(states.to(self.device), 
-                                  taken_actions.to(self.device) if taken_actions is not None else taken_actions, role)
-        else:
-            output = self._get_instantiator_output(states.to(self.device), \
-                taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+        output = self.compute(states.to(self.device), 
+                              taken_actions.to(self.device) if taken_actions is not None else taken_actions, role)
 
         # unnormalized log probabilities
         if self._c_unnormalized_log_prob[role] if role in self._c_unnormalized_log_prob else self._c_unnormalized_log_prob[""]:
diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py
index bc863320..72996d96 100644
--- a/skrl/models/torch/deterministic.py
+++ b/skrl/models/torch/deterministic.py
@@ -87,12 +87,8 @@ def act(self,
             torch.Size([4096, 1]) None None
         """
         # map from observations/states to actions
-        if self._instantiator_net is None:
-            actions = self.compute(states.to(self.device), 
-                                   taken_actions.to(self.device) if taken_actions is not None else taken_actions, role)
-        else:
-            actions = self._get_instantiator_output(states.to(self.device), \
-                taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+        actions = self.compute(states.to(self.device), 
+                               taken_actions.to(self.device) if taken_actions is not None else taken_actions, role)
 
         # clip actions 
         if self._d_clip_actions[role] if role in self._d_clip_actions else self._d_clip_actions[""]:
diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index c06fe570..8e69a343 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -135,14 +135,9 @@ def act(self,
             torch.Size([4096, 8]) torch.Size([4096, 1]) torch.Size([4096, 8])
         """
         # map from states/observations to mean actions and log standard deviations
-        if self._instantiator_net is None:
-            actions_mean, log_std = self.compute(states.to(self.device), 
-                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions,
-                                                 role)
-        else:
-            actions_mean, log_std = self._get_instantiator_output(states.to(self.device), \
-                taken_actions.to(self.device) if taken_actions is not None else taken_actions)
-        
+        actions_mean, log_std = self.compute(states.to(self.device), 
+                                             taken_actions.to(self.device) if taken_actions is not None else taken_actions, role)
+
         # clamp log standard deviations
         if self._g_clip_log_std[role] if role in self._g_clip_log_std else self._g_clip_log_std[""]:
             log_std = torch.clamp(log_std, 
diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py
index cc65b7c9..b4d6e7b3 100644
--- a/skrl/models/torch/multivariate_gaussian.py
+++ b/skrl/models/torch/multivariate_gaussian.py
@@ -121,14 +121,9 @@ def act(self,
             torch.Size([4096, 8]) torch.Size([4096, 1]) torch.Size([4096, 8])
         """
         # map from states/observations to mean actions and log standard deviations
-        if self._instantiator_net is None:
-            actions_mean, log_std = self.compute(states.to(self.device), 
-                                                 taken_actions.to(self.device) if taken_actions is not None else taken_actions,
-                                                 role)
-        else:
-            actions_mean, log_std = self._get_instantiator_output(states.to(self.device), \
-                taken_actions.to(self.device) if taken_actions is not None else taken_actions)
-        
+        actions_mean, log_std = self.compute(states.to(self.device), 
+                                             taken_actions.to(self.device) if taken_actions is not None else taken_actions, role)
+
         # clamp log standard deviations
         if self._mg_clip_log_std[role] if role in self._mg_clip_log_std else self._mg_clip_log_std[""]:
             log_std = torch.clamp(log_std, 

From 52f9e708a42af674366ce039afbe97462a88f31c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Wed, 7 Sep 2022 22:37:09 +0200
Subject: [PATCH 059/108] Update and autocontain model instances

---
 skrl/utils/model_instantiators.py | 294 +++++++++++++++++++++++-------
 1 file changed, 226 insertions(+), 68 deletions(-)

diff --git a/skrl/utils/model_instantiators.py b/skrl/utils/model_instantiators.py
index d6b2da53..a4626bf6 100644
--- a/skrl/utils/model_instantiators.py
+++ b/skrl/utils/model_instantiators.py
@@ -7,9 +7,10 @@
 import torch.nn as nn
 
 from ..models.torch import Model
-from ..models.torch import GaussianModel
-from ..models.torch import CategoricalModel
-from ..models.torch import DeterministicModel
+from ..models.torch import GaussianMixin
+from ..models.torch import CategoricalMixin
+from ..models.torch import DeterministicMixin
+from ..models.torch import MultivariateGaussianMixin
 
 __all__ = ["categorical_model", "deterministic_model", "gaussian_model", "Shape"]
 
@@ -92,7 +93,7 @@ def _generate_sequential(model: Model,
                          hidden_activation: list = ["relu", "relu"], 
                          output_shape: Shape = Shape.ACTIONS, 
                          output_activation: Union[str, None] = "tanh", 
-                         output_scale: int = None,) -> nn.Sequential:
+                         output_scale: int = None) -> nn.Sequential:
     """Generate a sequential model
 
     :param model: model to generate sequential model for
@@ -141,8 +142,8 @@ def gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] =
                    hidden_activation: list = ["relu", "relu"], 
                    output_shape: Shape = Shape.ACTIONS, 
                    output_activation: Union[str, None] = "tanh", 
-                   output_scale: float = 1.0) -> GaussianModel: 
-    """Instantiate a GaussianModel model
+                   output_scale: float = 1.0) -> Model: 
+    """Instantiate a Gaussian model
 
     :param observation_space: Observation/state space or shape (default: None).
                               If it is not None, the num_observations property will contain the size of that space
@@ -174,30 +175,145 @@ def gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] =
                          If None, the output layer will not be scaled
     :type output_scale: float, optional
 
-    :return: GaussianModel instance
-    :rtype: GaussianModel
+    :return: Gaussian model instance
+    :rtype: Model
     """
-    model = GaussianModel(observation_space=observation_space,
-                          action_space=action_space, 
-                          device=device, 
-                          clip_actions=clip_actions, 
-                          clip_log_std=clip_log_std, 
-                          min_log_std=min_log_std,
-                          max_log_std=max_log_std)
-    
-    model._instantiator_net = _generate_sequential(model=model,
-                                                   input_shape=input_shape,
-                                                   hiddens=hiddens,
-                                                   hidden_activation=hidden_activation,
-                                                   output_shape=output_shape,
-                                                   output_activation=output_activation,
-                                                   output_scale=output_scale)
-    model._instantiator_output_scale = output_scale
-    model._instantiator_input_type = input_shape.value
-    model._instantiator_parameter = nn.Parameter(torch.zeros(_get_num_units_by_shape(model, output_shape)))
-
-    return model
+    class GaussianModel(GaussianMixin, Model):
+        def __init__(self, observation_space, action_space, device, clip_actions,
+                     clip_log_std, min_log_std, max_log_std, metadata):
+            Model.__init__(self, observation_space, action_space, device)
+            GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
+            self.instantiator_output_scale = metadata["output_scale"]
+            self.instantiator_input_type = metadata["input_shape"].value
+
+            self.net = _generate_sequential(model=self,
+                                            input_shape=metadata["input_shape"],
+                                            hiddens=metadata["hiddens"],
+                                            hidden_activation=metadata["hidden_activation"],
+                                            output_shape=metadata["output_shape"],
+                                            output_activation=metadata["output_activation"],
+                                            output_scale=metadata["output_scale"])
+            self.log_std_parameter = nn.Parameter(torch.zeros(_get_num_units_by_shape(self, metadata["output_shape"])))
+        
+        def compute(self, states, taken_actions=None, role=""):
+            if self.instantiator_input_type == 0:
+                output = self.net(states)
+            elif self.instantiator_input_type == -1:
+                output = self.net(taken_actions)
+            elif self.instantiator_input_type == -2:
+                output = self.net(torch.cat((states, taken_actions), dim=1))
+
+            return output * self.instantiator_output_scale, self.log_std_parameter
+
+    metadata = {"input_shape": input_shape, 
+                "hiddens": hiddens, 
+                "hidden_activation": hidden_activation, 
+                "output_shape": output_shape, 
+                "output_activation": output_activation, 
+                "output_scale": output_scale}
+
+    return GaussianModel(observation_space=observation_space,
+                         action_space=action_space, 
+                         device=device, 
+                         clip_actions=clip_actions, 
+                         clip_log_std=clip_log_std, 
+                         min_log_std=min_log_std,
+                         max_log_std=max_log_std,
+                         metadata=metadata)
     
+def multivariate_gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
+                                action_space: Union[int, Tuple[int], gym.Space, None] = None,
+                                device: Union[str, torch.device] = "cuda:0", 
+                                clip_actions: bool = False, 
+                                clip_log_std: bool = True, 
+                                min_log_std: float = -20, 
+                                max_log_std: float = 2, 
+                                input_shape: Shape = Shape.STATES, 
+                                hiddens: list = [256, 256], 
+                                hidden_activation: list = ["relu", "relu"], 
+                                output_shape: Shape = Shape.ACTIONS, 
+                                output_activation: Union[str, None] = "tanh", 
+                                output_scale: float = 1.0) -> Model: 
+    """Instantiate a multivariate Gaussian model
+
+    :param observation_space: Observation/state space or shape (default: None).
+                              If it is not None, the num_observations property will contain the size of that space
+    :type observation_space: int, tuple or list of integers, gym.Space or None, optional
+    :param action_space: Action space or shape (default: None).
+                         If it is not None, the num_actions property will contain the size of that space
+    :type action_space: int, tuple or list of integers, gym.Space or None, optional
+    :param device: Device on which the model will be trained (default: "cuda:0")
+    :type device: str or torch.device, optional
+    :param clip_actions: Flag to indicate whether the actions should be clipped (default: False)
+    :type clip_actions: bool, optional
+    :param clip_log_std: Flag to indicate whether the log standard deviations should be clipped (default: True)
+    :type clip_log_std: bool, optional
+    :param min_log_std: Minimum value of the log standard deviation (default: -20)
+    :type min_log_std: float, optional
+    :param max_log_std: Maximum value of the log standard deviation (default: 2)
+    :type max_log_std: float, optional
+    :param input_shape: Shape of the input (default: Shape.STATES)
+    :type input_shape: Shape, optional
+    :param hiddens: Number of hidden units in each hidden layer
+    :type hiddens: int or list of ints
+    :param hidden_activation: Activation function for each hidden layer (default: "relu").
+    :type hidden_activation: list of strings
+    :param output_shape: Shape of the output (default: Shape.ACTIONS)
+    :type output_shape: Shape, optional
+    :param output_activation: Activation function for the output layer (default: "tanh")
+    :type output_activation: str or None, optional
+    :param output_scale: Scale of the output layer (default: 1.0).
+                         If None, the output layer will not be scaled
+    :type output_scale: float, optional
+
+    :return: Multivariate Gaussian model instance
+    :rtype: Model
+    """
+    class MultivariateGaussianModel(MultivariateGaussianMixin, Model):
+        def __init__(self, observation_space, action_space, device, clip_actions,
+                     clip_log_std, min_log_std, max_log_std, metadata):
+            Model.__init__(self, observation_space, action_space, device)
+            MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
+            self.instantiator_output_scale = metadata["output_scale"]
+            self.instantiator_input_type = metadata["input_shape"].value
+
+            self.net = _generate_sequential(model=self,
+                                            input_shape=metadata["input_shape"],
+                                            hiddens=metadata["hiddens"],
+                                            hidden_activation=metadata["hidden_activation"],
+                                            output_shape=metadata["output_shape"],
+                                            output_activation=metadata["output_activation"],
+                                            output_scale=metadata["output_scale"])
+            self.log_std_parameter = nn.Parameter(torch.zeros(_get_num_units_by_shape(self, metadata["output_shape"])))
+        
+        def compute(self, states, taken_actions=None, role=""):
+            if self.instantiator_input_type == 0:
+                output = self.net(states)
+            elif self.instantiator_input_type == -1:
+                output = self.net(taken_actions)
+            elif self.instantiator_input_type == -2:
+                output = self.net(torch.cat((states, taken_actions), dim=1))
+
+            return output * self.instantiator_output_scale, self.log_std_parameter
+
+    metadata = {"input_shape": input_shape, 
+                "hiddens": hiddens, 
+                "hidden_activation": hidden_activation, 
+                "output_shape": output_shape, 
+                "output_activation": output_activation, 
+                "output_scale": output_scale}
+
+    return MultivariateGaussianModel(observation_space=observation_space,
+                                     action_space=action_space, 
+                                     device=device, 
+                                     clip_actions=clip_actions, 
+                                     clip_log_std=clip_log_std, 
+                                     min_log_std=min_log_std,
+                                     max_log_std=max_log_std,
+                                     metadata=metadata)
+
 def deterministic_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
                         action_space: Union[int, Tuple[int], gym.Space, None] = None, 
                         device: Union[str, torch.device] = "cuda:0", 
@@ -207,8 +323,8 @@ def deterministic_model(observation_space: Union[int, Tuple[int], gym.Space, Non
                         hidden_activation: list = ["relu", "relu"], 
                         output_shape: Shape = Shape.ACTIONS, 
                         output_activation: Union[str, None] = "tanh", 
-                        output_scale: float = 1.0) -> DeterministicModel:
-    """Instantiate a DeterministicModel model
+                        output_scale: float = 1.0) -> Model:
+    """Instantiate a deterministic model
 
     :param observation_space: Observation/state space or shape (default: None).
                               If it is not None, the num_observations property will contain the size of that space
@@ -234,26 +350,48 @@ def deterministic_model(observation_space: Union[int, Tuple[int], gym.Space, Non
                          If None, the output layer will not be scaled
     :type output_scale: float, optional
 
-    :return: DeterministicModel instance
-    :rtype: DeterministicModel
+    :return: Deterministic model instance
+    :rtype: Model
     """
-    model = DeterministicModel(observation_space=observation_space,
-                               action_space=action_space, 
-                               device=device, 
-                               clip_actions=clip_actions)
-    
-    model._instantiator_net = _generate_sequential(model=model,
-                                                   input_shape=input_shape,
-                                                   hiddens=hiddens,
-                                                   hidden_activation=hidden_activation,
-                                                   output_shape=output_shape,
-                                                   output_activation=output_activation,
-                                                   output_scale=output_scale)
-    model._instantiator_output_scale = output_scale
-    model._instantiator_input_type = input_shape.value
-
-    return model
-    
+    class DeterministicModel(DeterministicMixin, Model):
+        def __init__(self, observation_space, action_space, device, clip_actions, metadata):
+            Model.__init__(self, observation_space, action_space, device)
+            DeterministicMixin.__init__(self, clip_actions)
+
+            self.instantiator_output_scale = metadata["output_scale"]
+            self.instantiator_input_type = metadata["input_shape"].value
+
+            self.net = _generate_sequential(model=self,
+                                            input_shape=metadata["input_shape"],
+                                            hiddens=metadata["hiddens"],
+                                            hidden_activation=metadata["hidden_activation"],
+                                            output_shape=metadata["output_shape"],
+                                            output_activation=metadata["output_activation"],
+                                            output_scale=metadata["output_scale"])
+        
+        def compute(self, states, taken_actions=None, role=""):
+            if self.instantiator_input_type == 0:
+                output = self.net(states)
+            elif self.instantiator_input_type == -1:
+                output = self.net(taken_actions)
+            elif self.instantiator_input_type == -2:
+                output = self.net(torch.cat((states, taken_actions), dim=1))
+
+            return output * self.instantiator_output_scale
+
+    metadata = {"input_shape": input_shape, 
+                "hiddens": hiddens, 
+                "hidden_activation": hidden_activation, 
+                "output_shape": output_shape, 
+                "output_activation": output_activation, 
+                "output_scale": output_scale}
+
+    return DeterministicModel(observation_space=observation_space,
+                              action_space=action_space, 
+                              device=device, 
+                              clip_actions=clip_actions, 
+                              metadata=metadata)
+
 def categorical_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
                       action_space: Union[int, Tuple[int], gym.Space, None] = None, 
                       device: Union[str, torch.device] = "cuda:0", 
@@ -262,8 +400,8 @@ def categorical_model(observation_space: Union[int, Tuple[int], gym.Space, None]
                       hiddens: list = [256, 256], 
                       hidden_activation: list = ["relu", "relu"], 
                       output_shape: Shape = Shape.ACTIONS, 
-                      output_activation: Union[str, None] = None) -> CategoricalModel:
-    """Instantiate a CategoricalModel model
+                      output_activation: Union[str, None] = None) -> Model:
+    """Instantiate a categorical model
 
     :param observation_space: Observation/state space or shape (default: None).
                               If it is not None, the num_observations property will contain the size of that space
@@ -289,21 +427,41 @@ def categorical_model(observation_space: Union[int, Tuple[int], gym.Space, None]
     :param output_activation: Activation function for the output layer (default: None)
     :type output_activation: str or None, optional
 
-    :return: CategoricalModel instance
-    :rtype: CategoricalModel
+    :return: Categorical model instance
+    :rtype: Model
     """
-    model = CategoricalModel(observation_space=observation_space,
-                             action_space=action_space, 
-                             device=device, 
-                             unnormalized_log_prob=unnormalized_log_prob)
-    
-    model._instantiator_net = _generate_sequential(model=model,
-                                                   input_shape=input_shape,
-                                                   hiddens=hiddens,
-                                                   hidden_activation=hidden_activation,
-                                                   output_shape=output_shape,
-                                                   output_activation=output_activation)
-    model._instantiator_input_type = input_shape.value
-
-    return model
-    
\ No newline at end of file
+    class CategoricalModel(CategoricalMixin, Model):
+        def __init__(self, observation_space, action_space, device, unnormalized_log_prob, metadata):
+            Model.__init__(self, observation_space, action_space, device)
+            CategoricalMixin.__init__(self, unnormalized_log_prob)
+
+            self.instantiator_input_type = metadata["input_shape"].value
+
+            self.net = _generate_sequential(model=self,
+                                            input_shape=metadata["input_shape"],
+                                            hiddens=metadata["hiddens"],
+                                            hidden_activation=metadata["hidden_activation"],
+                                            output_shape=metadata["output_shape"],
+                                            output_activation=metadata["output_activation"])
+        
+        def compute(self, states, taken_actions=None, role=""):
+            if self.instantiator_input_type == 0:
+                output = self.net(states)
+            elif self.instantiator_input_type == -1:
+                output = self.net(taken_actions)
+            elif self.instantiator_input_type == -2:
+                output = self.net(torch.cat((states, taken_actions), dim=1))
+
+            return output
+
+    metadata = {"input_shape": input_shape, 
+                "hiddens": hiddens, 
+                "hidden_activation": hidden_activation, 
+                "output_shape": output_shape, 
+                "output_activation": output_activation}
+
+    return CategoricalModel(observation_space=observation_space,
+                            action_space=action_space, 
+                            device=device, 
+                            unnormalized_log_prob=unnormalized_log_prob, 
+                            metadata=metadata)

From acd81ae0f1c46a3568570e3edc9f1807e4cfd592 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Wed, 7 Sep 2022 22:49:15 +0200
Subject: [PATCH 060/108] Expose multivariate Gaussian instantiator and add it
 to docs

---
 docs/source/modules/skrl.utils.model_instantiators.rst | 4 +++-
 skrl/utils/model_instantiators.py                      | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/source/modules/skrl.utils.model_instantiators.rst b/docs/source/modules/skrl.utils.model_instantiators.rst
index 005dfc94..536f4d04 100644
--- a/docs/source/modules/skrl.utils.model_instantiators.rst
+++ b/docs/source/modules/skrl.utils.model_instantiators.rst
@@ -36,6 +36,8 @@ API
 
 .. autofunction:: skrl.utils.model_instantiators.categorical_model
 
+.. autofunction:: skrl.utils.model_instantiators.deterministic_model
+
 .. autofunction:: skrl.utils.model_instantiators.gaussian_model
 
-.. autofunction:: skrl.utils.model_instantiators.deterministic_model
+.. autofunction:: skrl.utils.model_instantiators.multivariate_gaussian_model
diff --git a/skrl/utils/model_instantiators.py b/skrl/utils/model_instantiators.py
index a4626bf6..86b64294 100644
--- a/skrl/utils/model_instantiators.py
+++ b/skrl/utils/model_instantiators.py
@@ -12,7 +12,7 @@
 from ..models.torch import DeterministicMixin
 from ..models.torch import MultivariateGaussianMixin
 
-__all__ = ["categorical_model", "deterministic_model", "gaussian_model", "Shape"]
+__all__ = ["categorical_model", "deterministic_model", "gaussian_model", "multivariate_gaussian_model", "Shape"]
 
 
 class Shape(Enum):

From 7d8c8f96831a49e3886359a9fbd714be655a53e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 10 Sep 2022 12:11:52 +0200
Subject: [PATCH 061/108] Add packaging to library dependencies

---
 docs/requirements.txt              | 1 +
 docs/source/intro/installation.rst | 1 +
 setup.py                           | 1 +
 3 files changed, 3 insertions(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 65315ffa..dc80cf40 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -6,3 +6,4 @@ gym
 torch
 tensorboard
 tqdm
+packaging
diff --git a/docs/source/intro/installation.rst b/docs/source/intro/installation.rst
index eaf11187..cd5e6bdb 100644
--- a/docs/source/intro/installation.rst
+++ b/docs/source/intro/installation.rst
@@ -12,6 +12,7 @@ Prerequisites
 
     * `gym <https://www.gymlibrary.dev>`_
     * `tqdm <https://tqdm.github.io>`_
+    * `packaging <https://packaging.pypa.io>`_
     * `torch <https://pytorch.org>`_ 1.8.0 or higher
     * `tensorboard <https://www.tensorflow.org/tensorboard>`_
 
diff --git a/setup.py b/setup.py
index d9bf4aa1..bbf87912 100644
--- a/setup.py
+++ b/setup.py
@@ -13,6 +13,7 @@
     "torch",
     "tensorboard",
     "tqdm",
+    "packaging",
 ]
 
 # installation

From a75d6d2533d1831ad8058cce0843d6facf90235a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 10 Sep 2022 12:37:44 +0200
Subject: [PATCH 062/108] Fix Gym's environment wrapper for the new API

---
 skrl/envs/torch/wrappers.py | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/skrl/envs/torch/wrappers.py b/skrl/envs/torch/wrappers.py
index c4dfb43c..2ab26175 100644
--- a/skrl/envs/torch/wrappers.py
+++ b/skrl/envs/torch/wrappers.py
@@ -3,9 +3,12 @@
 import gym
 import collections
 import numpy as np
+from packaging import version
 
 import torch
 
+from skrl import logger
+
 __all__ = ["wrap_env"]
 
 
@@ -271,10 +274,9 @@ def __init__(self, env: Any) -> None:
         except Exception as e:
             print("[WARNING] Failed to check for a vectorized environment: {}".format(e))
 
-        if hasattr(self, "new_step_api"):
-            self._new_step_api = self._env.new_step_api
-        else:
-            self._new_step_api = False
+        self._drepecated_api = version.parse(gym.__version__) < version.parse(" 0.25.0")
+        if self._drepecated_api:
+            logger.warning("Using a deprecated version of OpenAI Gym's API: {}".format(gym.__version__))
 
     @property
     def state_space(self) -> gym.Space:
@@ -346,8 +348,11 @@ def _tensor_to_action(self, actions: torch.Tensor) -> Any:
         """
         space = self._env.action_space if self._vectorized else self.action_space
 
-        if self._vectorized and isinstance(space, gym.spaces.MultiDiscrete):
-            return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape)
+        if self._vectorized:
+            if isinstance(space, gym.spaces.MultiDiscrete):
+                return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape)
+            elif isinstance(space, gym.spaces.Tuple):
+                return np.array(actions.cpu().numpy(), dtype=space[0].dtype).reshape(space.shape)
         elif isinstance(space, gym.spaces.Discrete):
             return actions.item()
         elif isinstance(space, gym.spaces.Box):
@@ -364,11 +369,14 @@ def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch
         :return: The state, the reward, the done flag, and the info
         :rtype: tuple of torch.Tensor and any other info
         """
-        if self._new_step_api:
-            observation, reward, termination, truncation, info = self._env.step(self._tensor_to_action(actions))
-            done = termination or truncation
-        else:
+        if self._drepecated_api:
             observation, reward, done, info = self._env.step(self._tensor_to_action(actions))
+        else:
+            observation, reward, termination, truncation, info = self._env.step(self._tensor_to_action(actions))
+            if type(termination) is bool:
+                done = termination or truncation
+            else:
+                done = np.logical_or(termination, truncation)
         # convert response to torch
         return self._observation_to_tensor(observation), \
                torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1), \
@@ -381,7 +389,10 @@ def reset(self) -> torch.Tensor:
         :return: The state of the environment
         :rtype: torch.Tensor
         """
-        observation = self._env.reset()
+        if self._drepecated_api:
+            observation = self._env.reset()
+        else:
+            observation, info = self._env.reset()
         return self._observation_to_tensor(observation)
 
     def render(self, *args, **kwargs) -> None:

From 6f8443e8fa6630e6508ba647dd2e5958cb963536 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 10 Sep 2022 17:40:26 +0200
Subject: [PATCH 063/108] Convert tabular model class to mixin

---
 skrl/models/torch/__init__.py |   2 +-
 skrl/models/torch/tabular.py  | 167 ++++++++++++++++++++++++++--------
 2 files changed, 130 insertions(+), 39 deletions(-)

diff --git a/skrl/models/torch/__init__.py b/skrl/models/torch/__init__.py
index ed7b6389..9a4e879c 100644
--- a/skrl/models/torch/__init__.py
+++ b/skrl/models/torch/__init__.py
@@ -1,6 +1,6 @@
 from .base import Model
 
-from .tabular import TabularModel
+from .tabular import TabularMixin
 from .gaussian import GaussianMixin
 from .categorical import CategoricalMixin
 from .deterministic import DeterministicMixin
diff --git a/skrl/models/torch/tabular.py b/skrl/models/torch/tabular.py
index b2206f83..8fe88279 100644
--- a/skrl/models/torch/tabular.py
+++ b/skrl/models/torch/tabular.py
@@ -1,40 +1,67 @@
-from typing import Union, Tuple
-
-import gym
+from typing import Optional, Mapping, Sequence
 
 import torch
 
 from . import Model
 
 
-class TabularModel(Model):
-    def __init__(self, 
-                 observation_space: Union[int, Tuple[int], gym.Space, None] = None, 
-                 action_space: Union[int, Tuple[int], gym.Space, None] = None, 
-                 device: Union[str, torch.device] = "cuda:0",
-                 num_envs: int = 1) -> None:
-        """Tabular model
-
-        :param observation_space: Observation/state space or shape (default: None).
-                                  If it is not None, the num_observations property will contain the size of that space
-        :type observation_space: int, tuple or list of integers, gym.Space or None, optional
-        :param action_space: Action space or shape (default: None).
-                             If it is not None, the num_actions property will contain the size of that space
-        :type action_space: int, tuple or list of integers, gym.Space or None, optional
-        :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0")
-        :type device: str or torch.device, optional
+class TabularMixin:
+    def __init__(self, num_envs: int = 1, role: str = "") -> None:
+        """Tabular mixin model
+
         :param num_envs: Number of environments (default: 1)
         :type num_envs: int, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
+
+        Example::
+
+            # define the model
+            >>> import torch
+            >>> from skrl.models.torch import Model, TabularMixin
+            >>> 
+            >>> class GreedyPolicy(TabularMixin, Model):
+            ...     def __init__(self, observation_space, action_space, device="cuda:0", num_envs=1):
+            ...         Model.__init__(self, observation_space, action_space, device)
+            ...         TabularMixin.__init__(self, num_envs)
+            ...
+            ...         self.table = torch.ones((num_envs, self.num_observations, self.num_actions), 
+            ...                                 dtype=torch.float32, device=self.device)
+            ...
+            ...     def compute(self, states, taken_actions, role):
+            ...         actions = torch.argmax(self.table[torch.arange(self.num_envs).view(-1, 1), states], 
+            ...                                dim=-1, keepdim=True).view(-1,1)
+            ...
+            >>> # given an observation_space: gym.spaces.Discrete with n=100
+            >>> # and an action_space: gym.spaces.Discrete with n=5
+            >>> model = GreedyPolicy(observation_space, action_space, num_envs=1)
+            >>> 
+            >>> print(model)
+            GreedyPolicy(
+              (table): Tensor(shape=[1, 100, 5])
+            )
         """
-        super(TabularModel, self).__init__(observation_space, action_space, device)
-
         self.num_envs = num_envs
 
-    def _get_tensor_names(self) -> Tuple[str]:
+    def __repr__(self) -> str:
+        """String representation of an object as torch.nn.Module
+        """
+        lines = []
+        for name in self._get_tensor_names():
+            tensor = getattr(self, name)
+            lines.append("({}): {}(shape={})".format(name, tensor.__class__.__name__, list(tensor.shape)))
+
+        main_str = self.__class__.__name__ + '('
+        if lines:
+            main_str += "\n  {}\n".format("\n  ".join(lines))
+        main_str += ')'
+        return main_str
+
+    def _get_tensor_names(self) -> Sequence[str]:
         """Get the names of the tensors that the model is using
 
         :return: Tensor names
-        :rtype: tuple of str
+        :rtype: sequence of str
         """
         tensors = []
         for attr in dir(self):
@@ -44,24 +71,31 @@ def _get_tensor_names(self) -> Tuple[str]:
 
     def act(self, 
             states: torch.Tensor, 
-            taken_actions: Union[torch.Tensor, None] = None, 
-            inference=False) -> Tuple[torch.Tensor]:
+            taken_actions: Optional[torch.Tensor] = None, 
+            role: str = "") -> Sequence[torch.Tensor]:
         """Act in response to the state of the environment
 
         :param states: Observation/state of the environment used to make the decision
         :type states: torch.Tensor
-        :param taken_actions: Actions taken by a policy to the given states (default: None)
-        :type taken_actions: torch.Tensor or None, optional
-        :param inference: Flag to indicate whether the model is making inference (default: False).
-                          If True, the returned tensors will be detached from the current graph
-        :type inference: bool, optional
+        :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
+                              The use of these actions only makes sense in critical models, e.g.
+        :type taken_actions: torch.Tensor, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
 
         :return: Action to be taken by the agent given the state of the environment.
-                 The tuple's components are the computed actions and None for the last two components
-        :rtype: tuple of torch.Tensor
+                 The sequence's components are the computed actions and None for the last two components
+        :rtype: sequence of torch.Tensor
+
+        Example::
+
+            >>> # given a batch of sample states with shape (1, 100)
+            >>> output = model.act(states)
+            >>> print(output[0], output[1], output[2])
+            tensor([[3]], device='cuda:0') None None
         """
         actions = self.compute(states.to(self.device), 
-                               taken_actions.to(self.device) if taken_actions is not None else taken_actions)
+                               taken_actions.to(self.device) if taken_actions is not None else taken_actions, role)
         return actions, None, None
         
     def table(self) -> torch.Tensor:
@@ -69,6 +103,12 @@ def table(self) -> torch.Tensor:
 
         :return: Q-table
         :rtype: torch.Tensor
+
+        Example::
+
+            >>> output = model.table()
+            >>> print(output.shape)
+            torch.Size([1, 100, 5])
         """
         return self.q_table
 
@@ -83,29 +123,80 @@ def to(self, *args, **kwargs) -> Model:
         :return: Model moved to the specified device
         :rtype: Model
         """
-        super(TabularModel, self).to(*args, **kwargs)
+        Model.to(self, *args, **kwargs)
         for name in self._get_tensor_names():
             setattr(self, name, getattr(self, name).to(*args, **kwargs))
         return self
 
-    def save(self, path: str, state_dict: Union[dict, None] = None) -> None:
+    def state_dict(self, *args, **kwargs) -> Mapping:
+        """Returns a dictionary containing a whole state of the module
+
+        :return: A dictionary containing a whole state of the module
+        :rtype: dict
+        """
+        _state_dict = {name: getattr(self, name) for name in self._get_tensor_names()}
+        Model.state_dict(self, destination=_state_dict)
+        return _state_dict
+
+    def load_state_dict(self, state_dict: Mapping, strict: bool = True) -> None:
+        """Copies parameters and buffers from state_dict into this module and its descendants
+
+        :param state_dict: A dict containing parameters and persistent buffers
+        :type state_dict: dict
+        :param strict: Whether to strictly enforce that the keys in state_dict match the keys 
+                       returned by this module's state_dict() function (default: ``True``)
+        :type strict: bool, optional
+        """
+        Model.load_state_dict(self, state_dict, strict=False)
+        
+        for name, tensor in state_dict.items():
+            if hasattr(self, name) and isinstance(getattr(self, name), torch.Tensor):
+                _tensor = getattr(self, name)
+                if isinstance(_tensor, torch.Tensor):
+                    if _tensor.shape == tensor.shape and _tensor.dtype == tensor.dtype:
+                        setattr(self, name, tensor)
+                    else:
+                        raise ValueError("Tensor shape ({} vs {}) or dtype ({} vs {}) mismatch"\
+                            .format(_tensor.shape, tensor.shape, _tensor.dtype, tensor.dtype))
+            else:
+                raise ValueError("{} is not a tensor of {}".format(name, self.__class__.__name__))
+
+    def save(self, path: str, state_dict: Optional[dict] = None) -> None:
         """Save the model to the specified path
             
         :param path: Path to save the model to
         :type path: str
-        :param state_dict: State dictionary to save (default: None).
+        :param state_dict: State dictionary to save (default: ``None``).
                            If None, the model's state_dict will be saved
         :type state_dict: dict, optional
+
+        Example::
+
+            # save the current model to the specified path
+            >>> model.save("/tmp/model.pt")
         """
+        # TODO: save state_dict
         torch.save({name: getattr(self, name) for name in self._get_tensor_names()}, path)
 
     def load(self, path: str) -> None:
         """Load the model from the specified path
-        
-        :raises ValueError: If the models are not compatible
+
+        The final storage device is determined by the constructor of the model
 
         :param path: Path to load the model from
         :type path: str
+
+        :raises ValueError: If the models are not compatible
+
+        Example::
+
+            # load the model onto the CPU
+            >>> model = Model(observation_space, action_space, device="cpu")
+            >>> model.load("model.pt")
+
+            # load the model onto the GPU 1
+            >>> model = Model(observation_space, action_space, device="cuda:1")
+            >>> model.load("model.pt")
         """
         tensors = torch.load(path)
         for name, tensor in tensors.items():

From 349f699c38c835062b72afa6657afdb399939ed3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 10 Sep 2022 17:41:42 +0200
Subject: [PATCH 064/108] Update tabular model in docs

---
 docs/source/modules/skrl.models.tabular.rst | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/source/modules/skrl.models.tabular.rst b/docs/source/modules/skrl.models.tabular.rst
index 25b27bba..774b338a 100644
--- a/docs/source/modules/skrl.models.tabular.rst
+++ b/docs/source/modules/skrl.models.tabular.rst
@@ -4,7 +4,7 @@ Tabular model
 =============
 
 Basic usage
-^^^^^^^^^^^
+-----------
 
 .. tabs::
     
@@ -17,11 +17,11 @@ Basic usage
             :end-before: [end-epsilon-greedy]
 
 API
-^^^
+---
 
-.. autoclass:: skrl.models.torch.tabular.TabularModel
-   :show-inheritance:
-   :members:
-   
-   .. automethod:: __init__
-   .. automethod:: compute
+.. autoclass:: skrl.models.torch.tabular.TabularMixin
+    :show-inheritance:
+    :exclude-members: to, state_dict, load_state_dict, load, save
+    :members:
+
+    .. automethod:: __init__

From 4506c6018768e045993b8ce83748587dcfa22a2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 10 Sep 2022 23:36:52 +0200
Subject: [PATCH 065/108] Update OpenAI Gym examples

---
 docs/source/examples/gym/gym_cartpole_cem.py  | 82 +++++++++++++++++++
 .../examples/gym/gym_cartpole_cem_eval.py     | 75 +++++++++++++++++
 docs/source/examples/gym/gym_cartpole_dqn.py  | 44 +++++-----
 .../examples/gym/gym_cartpole_dqn_eval.py     | 28 +++----
 .../gym/gym_frozen_lake_q_learning.py         | 26 +++---
 .../gym/gym_frozen_lake_q_learning_eval.py    | 28 ++++---
 docs/source/examples/gym/gym_pendulum_ddpg.py | 50 +++++------
 .../examples/gym/gym_pendulum_ddpg_eval.py    | 24 +++---
 docs/source/examples/gym/gym_taxi_sarsa.py    | 26 +++---
 .../examples/gym/gym_taxi_sarsa_eval.py       | 26 +++---
 .../examples/gym/gym_vector_cartpole_dqn.py   | 44 +++++-----
 .../gym/gym_vector_frozen_lake_q_learning.py  | 36 ++++----
 .../examples/gym/gym_vector_pendulum_ddpg.py  | 46 ++++++-----
 .../examples/gym/gym_vector_taxi_sarsa.py     | 26 +++---
 14 files changed, 361 insertions(+), 200 deletions(-)
 create mode 100644 docs/source/examples/gym/gym_cartpole_cem.py
 create mode 100644 docs/source/examples/gym/gym_cartpole_cem_eval.py

diff --git a/docs/source/examples/gym/gym_cartpole_cem.py b/docs/source/examples/gym/gym_cartpole_cem.py
new file mode 100644
index 00000000..ac78940b
--- /dev/null
+++ b/docs/source/examples/gym/gym_cartpole_cem.py
@@ -0,0 +1,82 @@
+import gym
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, CategoricalMixin
+from skrl.memories.torch import RandomMemory
+from skrl.agents.torch.cem import CEM, CEM_DEFAULT_CONFIG
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+
+
+# Define the model (categorical model) for the CEM agent using mixin
+# - Policy: takes as input the environment's observation/state and returns an action
+class Policy(CategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True):
+        Model.__init__(self, observation_space, action_space, device)
+        CategoricalMixin.__init__(self, unnormalized_log_prob)
+
+        self.linear_layer_1 = nn.Linear(self.num_observations, 64)
+        self.linear_layer_2 = nn.Linear(64, 64)
+        self.output_layer = nn.Linear(64, self.num_actions)
+
+    def compute(self, states, taken_actions, role):
+        x = F.relu(self.linear_layer_1(states))
+        x = F.relu(self.linear_layer_2(x))
+        return self.output_layer(x)
+
+
+# Load and wrap the Gym environment.
+# Note: the environment version may change depending on the gym version
+try:
+    env = gym.make("CartPole-v0")
+except gym.error.DeprecatedEnv as e:
+    env_id = [spec.id for spec in gym.envs.registry.all() if spec.id.startswith("CartPole-v")][0]
+    print("CartPole-v0 not found. Trying {}".format(env_id))
+    env = gym.make(env_id)
+env = wrap_env(env)
+
+device = env.device
+
+
+# Instantiate a RandomMemory (without replacement) as experience replay memory
+memory = RandomMemory(memory_size=1000, num_envs=env.num_envs, device=device, replacement=False)
+
+
+# Instantiate the agent's model (function approximator).
+# CEM requires 1 model, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.cem.html#spaces-and-models
+models_cem = {}
+models_cem["policy"] = Policy(env.observation_space, env.action_space, device)
+
+# Initialize the models' parameters (weights and biases) using a Gaussian distribution
+for model in models_cem.values():
+    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.cem.html#configuration-and-hyperparameters
+cfg_cem = CEM_DEFAULT_CONFIG.copy()
+cfg_cem["rollouts"] = 1000
+cfg_cem["learning_starts"] = 100
+# logging to TensorBoard and write checkpoints each 1000 and 5000 timesteps respectively
+cfg_cem["experiment"]["write_interval"] = 1000
+cfg_cem["experiment"]["checkpoint_interval"] = 5000
+
+agent_cem = CEM(models=models_cem, 
+                memory=memory, 
+                cfg=cfg_cem, 
+                observation_space=env.observation_space, 
+                action_space=env.action_space,
+                device=device)
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 100000, "headless": True}
+trainer = SequentialTrainer(env=env, agents=[agent_cem], cfg=cfg_trainer)
+
+# start training
+trainer.train()
diff --git a/docs/source/examples/gym/gym_cartpole_cem_eval.py b/docs/source/examples/gym/gym_cartpole_cem_eval.py
new file mode 100644
index 00000000..72a3633e
--- /dev/null
+++ b/docs/source/examples/gym/gym_cartpole_cem_eval.py
@@ -0,0 +1,75 @@
+import gym
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, CategoricalMixin
+from skrl.agents.torch.cem import CEM, CEM_DEFAULT_CONFIG
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+
+
+# Define the model (categorical model) for the CEM agent using mixin
+# - Policy: takes as input the environment's observation/state and returns an action
+class Policy(CategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True):
+        Model.__init__(self, observation_space, action_space, device)
+        CategoricalMixin.__init__(self, unnormalized_log_prob)
+
+        self.linear_layer_1 = nn.Linear(self.num_observations, 64)
+        self.linear_layer_2 = nn.Linear(64, 64)
+        self.output_layer = nn.Linear(64, self.num_actions)
+
+    def compute(self, states, taken_actions, role):
+        x = F.relu(self.linear_layer_1(states))
+        x = F.relu(self.linear_layer_2(x))
+        return self.output_layer(x)
+
+
+# Load and wrap the Gym environment.
+# Note: the environment version may change depending on the gym version
+try:
+    env = gym.make("CartPole-v0")
+except gym.error.DeprecatedEnv as e:
+    env_id = [spec.id for spec in gym.envs.registry.all() if spec.id.startswith("CartPole-v")][0]
+    print("CartPole-v0 not found. Trying {}".format(env_id))
+    env = gym.make(env_id)
+env = wrap_env(env)
+
+device = env.device
+
+
+# Instantiate the agent's model (function approximators).
+# CEM requires 1 model, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.cem.html#spaces-and-models
+models_cem = {}
+models_cem["policy"] = Policy(env.observation_space, env.action_space, device)
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.cem.html#configuration-and-hyperparameters
+cfg_cem = CEM_DEFAULT_CONFIG.copy()
+cfg_cem["rollouts"] = 1000
+cfg_cem["learning_starts"] = 100
+# logging to TensorBoard each 1000 timesteps and ignore checkpoints
+cfg_cem["experiment"]["write_interval"] = 1000
+cfg_cem["experiment"]["checkpoint_interval"] = 0
+
+agent_cem = CEM(models=models_cem, 
+                memory=None, 
+                cfg=cfg_cem, 
+                observation_space=env.observation_space, 
+                action_space=env.action_space,
+                device=device)
+
+# load checkpoint
+agent_cem.load("./runs/22-09-07_21-41-05-854385_CEM/checkpoints/best_agent.pt")
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 100000, "headless": True}
+trainer = SequentialTrainer(env=env, agents=[agent_cem], cfg=cfg_trainer)
+
+# evaluate the agent
+trainer.eval()
diff --git a/docs/source/examples/gym/gym_cartpole_dqn.py b/docs/source/examples/gym/gym_cartpole_dqn.py
index cb08ae85..98c7f6fe 100644
--- a/docs/source/examples/gym/gym_cartpole_dqn.py
+++ b/docs/source/examples/gym/gym_cartpole_dqn.py
@@ -22,32 +22,33 @@
 
 
 # Instantiate a RandomMemory (without replacement) as experience replay memory
-memory = RandomMemory(memory_size=100000, num_envs=env.num_envs, device=device, replacement=False)
+memory = RandomMemory(memory_size=50000, num_envs=env.num_envs, device=device, replacement=False)
 
 
 # Instantiate the agent's models (function approximators) using the model instantiator utility
 # DQN requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#spaces-and-models
-models_dqn = {"q_network": deterministic_model(observation_space=env.observation_space, 
-                                               action_space=env.action_space,
-                                               device=device,
-                                               clip_actions=False, 
-                                               input_shape=Shape.OBSERVATIONS,
-                                               hiddens=[64, 64],
-                                               hidden_activation=["relu", "relu"],
-                                               output_shape=Shape.ACTIONS,
-                                               output_activation=None,
-                                               output_scale=1.0),
-              "target_q_network": deterministic_model(observation_space=env.observation_space,
-                                                      action_space=env.action_space,
-                                                      device=device,
-                                                      clip_actions=False,
-                                                      input_shape=Shape.OBSERVATIONS,
-                                                      hiddens=[64, 64],
-                                                      hidden_activation=["relu", "relu"],
-                                                      output_shape=Shape.ACTIONS,
-                                                      output_activation=None,
-                                                      output_scale=1.0)}
+models_dqn = {}
+models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, 
+                                              action_space=env.action_space,
+                                              device=device,
+                                              clip_actions=False, 
+                                              input_shape=Shape.OBSERVATIONS,
+                                              hiddens=[64, 64],
+                                              hidden_activation=["relu", "relu"],
+                                              output_shape=Shape.ACTIONS,
+                                              output_activation=None,
+                                              output_scale=1.0)
+models_dqn["target_q_network"] = deterministic_model(observation_space=env.observation_space,
+                                                     action_space=env.action_space,
+                                                     device=device,
+                                                     clip_actions=False,
+                                                     input_shape=Shape.OBSERVATIONS,
+                                                     hiddens=[64, 64],
+                                                     hidden_activation=["relu", "relu"],
+                                                     output_shape=Shape.ACTIONS,
+                                                     output_activation=None,
+                                                     output_scale=1.0)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_dqn.values():
@@ -58,7 +59,6 @@
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#configuration-and-hyperparameters
 cfg_dqn = DQN_DEFAULT_CONFIG.copy()
-cfg_dqn["random_timesteps"] = 0
 cfg_dqn["learning_starts"] = 100
 cfg_dqn["exploration"]["final_epsilon"] = 0.04
 cfg_dqn["exploration"]["timesteps"] = 1500
diff --git a/docs/source/examples/gym/gym_cartpole_dqn_eval.py b/docs/source/examples/gym/gym_cartpole_dqn_eval.py
index 6360b1c7..1cd9c90a 100644
--- a/docs/source/examples/gym/gym_cartpole_dqn_eval.py
+++ b/docs/source/examples/gym/gym_cartpole_dqn_eval.py
@@ -23,26 +23,23 @@
 # Instantiate only the policy for evaluation.
 # DQN requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#spaces-and-models
-models_dqn = {"q_network": deterministic_model(observation_space=env.observation_space, 
-                                               action_space=env.action_space,
-                                               device=device,
-                                               clip_actions=False, 
-                                               input_shape=Shape.OBSERVATIONS,
-                                               hiddens=[64, 64],
-                                               hidden_activation=["relu", "relu"],
-                                               output_shape=Shape.ACTIONS,
-                                               output_activation=None,
-                                               output_scale=1.0)}
-
-# load checkpoint
-models_dqn["q_network"].load("./runs/22-02-06_19-19-56-857355_DQN/checkpoints/15000_q_network.pt")
+models_dqn = {}
+models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, 
+                                              action_space=env.action_space,
+                                              device=device,
+                                              clip_actions=False, 
+                                              input_shape=Shape.OBSERVATIONS,
+                                              hiddens=[64, 64],
+                                              hidden_activation=["relu", "relu"],
+                                              output_shape=Shape.ACTIONS,
+                                              output_activation=None,
+                                              output_scale=1.0)
 
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#configuration-and-hyperparameters
 cfg_dqn = DQN_DEFAULT_CONFIG.copy()
-cfg_dqn["random_timesteps"] = 0
 cfg_dqn["exploration"]["timesteps"] = 0
 # # logging to TensorBoard each 1000 timesteps and ignore checkpoints
 cfg_dqn["experiment"]["write_interval"] = 1000
@@ -55,6 +52,9 @@
                 action_space=env.action_space,
                 device=device)
 
+# load checkpoint
+agent_dqn.load("./runs/22-09-10_10-48-10-551426_DQN/checkpoints/best_agent.pt")
+
 
 # Configure and instantiate the RL trainer
 cfg_trainer = {"timesteps": 50000, "headless": True}
diff --git a/docs/source/examples/gym/gym_frozen_lake_q_learning.py b/docs/source/examples/gym/gym_frozen_lake_q_learning.py
index 822cdbda..c3141f38 100644
--- a/docs/source/examples/gym/gym_frozen_lake_q_learning.py
+++ b/docs/source/examples/gym/gym_frozen_lake_q_learning.py
@@ -3,21 +3,23 @@
 import torch
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import TabularModel
+from skrl.models.torch import Model, TabularMixin
 from skrl.agents.torch.q_learning import Q_LEARNING, Q_LEARNING_DEFAULT_CONFIG
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 
 
-# Define the model (tabular models) for the Q-learning agent using a helper class
-class EpilonGreedyPolicy(TabularModel):
+# Define the model (tabular model) for the SARSA agent using mixin
+class EpilonGreedyPolicy(TabularMixin, Model):
     def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1):
-        super().__init__(observation_space, action_space, device, num_envs)
+        Model.__init__(self, observation_space, action_space, device)
+        TabularMixin.__init__(self, num_envs)
 
         self.epsilon = epsilon
-        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device)
+        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), 
+                                  dtype=torch.float32, device=self.device)
         
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], 
                                dim=-1, keepdim=True).view(-1,1)
         
@@ -44,21 +46,19 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (table)
 # Q-learning requires 1 model, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#spaces-and-models
-models_q_learning = {"policy": EpilonGreedyPolicy(env.observation_space, env.action_space, device, \
-    num_envs=env.num_envs, epsilon=0.1)}
+models_q_learning = {}
+models_q_learning["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1)
 
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#configuration-and-hyperparameters
 cfg_q_learning = Q_LEARNING_DEFAULT_CONFIG.copy()
-cfg_q_learning["random_timesteps"] = 0
-cfg_q_learning["learning_starts"] = 0
 cfg_q_learning["discount_factor"] = 0.999
 cfg_q_learning["alpha"] = 0.4 
-# logging to TensorBoard and write checkpoints each 1000 and 5000 timesteps respectively
-cfg_q_learning["experiment"]["write_interval"] = 1000
-cfg_q_learning["experiment"]["checkpoint_interval"] = 5000
+# logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively
+cfg_q_learning["experiment"]["write_interval"] = 1600
+cfg_q_learning["experiment"]["checkpoint_interval"] = 8000
 
 agent_q_learning = Q_LEARNING(models=models_q_learning,
                               memory=None, 
diff --git a/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py b/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py
index c600516f..bfdc9452 100644
--- a/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py
+++ b/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py
@@ -3,21 +3,23 @@
 import torch
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import TabularModel
+from skrl.models.torch import Model, TabularMixin
 from skrl.agents.torch.q_learning import Q_LEARNING, Q_LEARNING_DEFAULT_CONFIG
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 
 
-# Define the model (tabular models) for the Q-learning agent using a helper class
-class EpilonGreedyPolicy(TabularModel):
+# Define the model (tabular model) for the SARSA agent using mixin
+class EpilonGreedyPolicy(TabularMixin, Model):
     def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1):
-        super().__init__(observation_space, action_space, device, num_envs)
+        Model.__init__(self, observation_space, action_space, device)
+        TabularMixin.__init__(self, num_envs)
 
         self.epsilon = epsilon
-        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device)
+        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), 
+                                  dtype=torch.float32, device=self.device)
         
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], 
                                dim=-1, keepdim=True).view(-1,1)
         
@@ -44,11 +46,8 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (table)
 # Q-learning requires 1 model, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#spaces-and-models
-models_q_learning = {"policy": EpilonGreedyPolicy(env.observation_space, env.action_space, device, \
-    num_envs=env.num_envs, epsilon=0.1)}
-
-# load checkpoint
-models_q_learning["policy"].load("./runs/22-03-09_12-09-36-143036_Q_LEARNING/checkpoints/60000_policy.pt")
+models_q_learning = {}
+models_q_learning["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1)
 
 
 # Configure and instantiate the agent.
@@ -56,8 +55,8 @@ def compute(self, states, taken_actions):
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#configuration-and-hyperparameters
 cfg_q_learning = Q_LEARNING_DEFAULT_CONFIG.copy()
 cfg_q_learning["random_timesteps"] = 0
-# logging to TensorBoard and write checkpoints each 1000 and ignore checkpoints
-cfg_q_learning["experiment"]["write_interval"] = 1000
+# logging to TensorBoard and write checkpoints each 1600 and ignore checkpoints
+cfg_q_learning["experiment"]["write_interval"] = 1600
 cfg_q_learning["experiment"]["checkpoint_interval"] = 0
 
 agent_q_learning = Q_LEARNING(models=models_q_learning,
@@ -67,6 +66,9 @@ def compute(self, states, taken_actions):
                               action_space=env.action_space,
                               device=device)
 
+# load checkpoint
+agent_q_learning.load("./runs/22-09-10_17-54-20-381109_Q_LEARNING/checkpoints/best_agent.pt")
+
 
 # Configure and instantiate the RL trainer
 cfg_trainer = {"timesteps": 80000, "headless": True}
diff --git a/docs/source/examples/gym/gym_pendulum_ddpg.py b/docs/source/examples/gym/gym_pendulum_ddpg.py
index 391472ec..7ac71623 100644
--- a/docs/source/examples/gym/gym_pendulum_ddpg.py
+++ b/docs/source/examples/gym/gym_pendulum_ddpg.py
@@ -5,7 +5,7 @@
 import torch.nn.functional as F
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import DeterministicModel
+from skrl.models.torch import Model, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG
 from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise
@@ -13,35 +13,36 @@
 from skrl.envs.torch import wrap_env
 
 
-# Define the models (deterministic models) for the DDPG agent using a helper class
-# and programming with two approaches (layer by layer and torch.nn.Sequential class).
+# Define the models (deterministic models) for the DDPG agent using mixin
 # - Actor (policy): takes as input the environment's observation/state and returns an action
 # - Critic: takes the state and action as input and provides a value to guide the policy 
-class DeterministicActor(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class DeterministicActor(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.linear_layer_1 = nn.Linear(self.num_observations, 400)
         self.linear_layer_2 = nn.Linear(400, 300)
         self.action_layer = nn.Linear(300, self.num_actions)
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         x = F.relu(self.linear_layer_1(states))
         x = F.relu(self.linear_layer_2(x))
         return 2 * torch.tanh(self.action_layer(x))  # Pendulum-v1 action_space is -2 to 2
 
-class DeterministicCritic(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class DeterministicCritic(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
-        self.net = nn.Sequential(nn.Linear(self.num_observations + self.num_actions, 400),
-                                 nn.ReLU(),
-                                 nn.Linear(400, 300),
-                                 nn.ReLU(),
-                                 nn.Linear(300, 1))
+        self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 400)
+        self.linear_layer_2 = nn.Linear(400, 300)
+        self.linear_layer_3 = nn.Linear(300, 1)
 
-    def compute(self, states, taken_actions):
-        return self.net(torch.cat([states, taken_actions], dim=1))
+    def compute(self, states, taken_actions, role):
+        x = F.relu(self.linear_layer_1(torch.cat([states, taken_actions], dim=1)))
+        x = F.relu(self.linear_layer_2(x))
+        return self.linear_layer_3(x)
 
 
 # Load and wrap the Gym environment.
@@ -64,10 +65,11 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # DDPG requires 4 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models
-models_ddpg = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-               "target_policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-               "critic": DeterministicCritic(env.observation_space, env.action_space, device),
-               "target_critic": DeterministicCritic(env.observation_space, env.action_space, device)}
+models_ddpg = {}
+models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device)
+models_ddpg["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device)
+models_ddpg["critic"] = DeterministicCritic(env.observation_space, env.action_space, device)
+models_ddpg["target_critic"] = DeterministicCritic(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ddpg.values():
@@ -82,9 +84,9 @@ def compute(self, states, taken_actions):
 cfg_ddpg["batch_size"] = 100
 cfg_ddpg["random_timesteps"] = 100
 cfg_ddpg["learning_starts"] = 100
-# logging to TensorBoard and write checkpoints each 1000 and 1000 timesteps respectively
-cfg_ddpg["experiment"]["write_interval"] = 1000
-cfg_ddpg["experiment"]["checkpoint_interval"] = 1000
+# logging to TensorBoard and write checkpoints each 300 and 1500 timesteps respectively
+cfg_ddpg["experiment"]["write_interval"] = 300
+cfg_ddpg["experiment"]["checkpoint_interval"] = 1500
 
 agent_ddpg = DDPG(models=models_ddpg, 
                   memory=memory, 
diff --git a/docs/source/examples/gym/gym_pendulum_ddpg_eval.py b/docs/source/examples/gym/gym_pendulum_ddpg_eval.py
index 7b2628ea..350cc85c 100644
--- a/docs/source/examples/gym/gym_pendulum_ddpg_eval.py
+++ b/docs/source/examples/gym/gym_pendulum_ddpg_eval.py
@@ -5,22 +5,23 @@
 import torch.nn.functional as F
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import DeterministicModel
+from skrl.models.torch import Model, DeterministicMixin
 from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 
 
 # Define only the policy for evaluation 
-class DeterministicActor(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class DeterministicActor(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.linear_layer_1 = nn.Linear(self.num_observations, 400)
         self.linear_layer_2 = nn.Linear(400, 300)
         self.action_layer = nn.Linear(300, self.num_actions)
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         x = F.relu(self.linear_layer_1(states))
         x = F.relu(self.linear_layer_2(x))
         return 2 * torch.tanh(self.action_layer(x))  # Pendulum-v1 action_space is -2 to 2
@@ -42,10 +43,8 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's policy.
 # DDPG requires 4 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models
-models_ddpg = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)}
-
-# load checkpoint
-models_ddpg["policy"].load("./runs/22-02-06_19-23-31-556859_DDPG/checkpoints/36000_policy.pt")
+models_ddpg = {}
+models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device)
 
 
 # Configure and instantiate the agent.
@@ -53,8 +52,8 @@ def compute(self, states, taken_actions):
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#configuration-and-hyperparameters
 cfg_ddpg = DDPG_DEFAULT_CONFIG.copy()
 cfg_ddpg["random_timesteps"] = 0
-# logging to TensorBoard each 1000 timesteps and ignore checkpoints
-cfg_ddpg["experiment"]["write_interval"] = 1000
+# logging to TensorBoard each 300 timesteps and ignore checkpoints
+cfg_ddpg["experiment"]["write_interval"] = 300
 cfg_ddpg["experiment"]["checkpoint_interval"] = 0
 
 agent_ddpg = DDPG(models=models_ddpg, 
@@ -64,6 +63,9 @@ def compute(self, states, taken_actions):
                   action_space=env.action_space,
                   device=device)
 
+# load checkpoint
+agent_ddpg.load("./runs/22-09-10_11-02-46-773796_DDPG/checkpoints/agent_15000.pt")
+
 
 # Configure and instantiate the RL trainer
 cfg_trainer = {"timesteps": 15000, "headless": True}
diff --git a/docs/source/examples/gym/gym_taxi_sarsa.py b/docs/source/examples/gym/gym_taxi_sarsa.py
index 99f43f12..5f7ba8dd 100644
--- a/docs/source/examples/gym/gym_taxi_sarsa.py
+++ b/docs/source/examples/gym/gym_taxi_sarsa.py
@@ -3,21 +3,23 @@
 import torch
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import TabularModel
+from skrl.models.torch import Model, TabularMixin
 from skrl.agents.torch.sarsa import SARSA, SARSA_DEFAULT_CONFIG
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 
 
-# Define the model (tabular models) for the SARSA agent using a helper class
-class EpilonGreedyPolicy(TabularModel):
+# Define the model (tabular model) for the SARSA agent using mixin
+class EpilonGreedyPolicy(TabularMixin, Model):
     def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1):
-        super().__init__(observation_space, action_space, device, num_envs)
+        Model.__init__(self, observation_space, action_space, device)
+        TabularMixin.__init__(self, num_envs)
 
         self.epsilon = epsilon
-        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device)
+        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), 
+                                  dtype=torch.float32, device=self.device)
         
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], 
                                dim=-1, keepdim=True).view(-1,1)
         
@@ -44,21 +46,19 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (table)
 # SARSA requires 1 model, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#spaces-and-models
-models_sarsa = {"policy": EpilonGreedyPolicy(env.observation_space, env.action_space, device, \
-    num_envs=env.num_envs, epsilon=0.1)}
+models_sarsa = {}
+models_sarsa["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1)
 
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#configuration-and-hyperparameters
 cfg_sarsa = SARSA_DEFAULT_CONFIG.copy()
-cfg_sarsa["random_timesteps"] = 0
-cfg_sarsa["learning_starts"] = 0
 cfg_sarsa["discount_factor"] = 0.999
 cfg_sarsa["alpha"] = 0.4 
-# logging to TensorBoard and write checkpoints each 1000 and 5000 timesteps respectively
-cfg_sarsa["experiment"]["write_interval"] = 1000
-cfg_sarsa["experiment"]["checkpoint_interval"] = 5000
+# logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively
+cfg_sarsa["experiment"]["write_interval"] = 1600
+cfg_sarsa["experiment"]["checkpoint_interval"] = 8000
 
 agent_sarsa = SARSA(models=models_sarsa,
                     memory=None, 
diff --git a/docs/source/examples/gym/gym_taxi_sarsa_eval.py b/docs/source/examples/gym/gym_taxi_sarsa_eval.py
index 6cc8232d..4cd64ac3 100644
--- a/docs/source/examples/gym/gym_taxi_sarsa_eval.py
+++ b/docs/source/examples/gym/gym_taxi_sarsa_eval.py
@@ -3,21 +3,23 @@
 import torch
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import TabularModel
+from skrl.models.torch import Model, TabularMixin
 from skrl.agents.torch.sarsa import SARSA, SARSA_DEFAULT_CONFIG
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 
 
 # Define the model (tabular models) for the SARSA agent using a helper class
-class EpilonGreedyPolicy(TabularModel):
+class EpilonGreedyPolicy(TabularMixin, Model):
     def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1):
-        super().__init__(observation_space, action_space, device, num_envs)
+        Model.__init__(self, observation_space, action_space, device)
+        TabularMixin.__init__(self, num_envs)
 
         self.epsilon = epsilon
-        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device)
+        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), 
+                                  dtype=torch.float32, device=self.device)
         
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], 
                                dim=-1, keepdim=True).view(-1,1)
         
@@ -44,11 +46,8 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (table)
 # SARSA requires 1 model, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#spaces-and-models
-models_sarsa = {"policy": EpilonGreedyPolicy(env.observation_space, env.action_space, device, \
-    num_envs=env.num_envs, epsilon=0.1)}
-
-# load checkpoint
-models_sarsa["policy"].load("./runs/22-03-09_12-20-00-224006_SARSA/checkpoints/80000_policy.pt")
+models_sarsa = {}
+models_sarsa["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1)
 
 
 # Configure and instantiate the agent.
@@ -56,8 +55,8 @@ def compute(self, states, taken_actions):
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#configuration-and-hyperparameters
 cfg_sarsa = SARSA_DEFAULT_CONFIG.copy()
 cfg_sarsa["random_timesteps"] = 0
-# logging to TensorBoard and write checkpoints each 1000 and ignore checkpoints
-cfg_sarsa["experiment"]["write_interval"] = 1000
+# logging to TensorBoard and write checkpoints each 1600 and ignore checkpoints
+cfg_sarsa["experiment"]["write_interval"] = 1600
 cfg_sarsa["experiment"]["checkpoint_interval"] = 0
 
 agent_sarsa = SARSA(models=models_sarsa,
@@ -67,6 +66,9 @@ def compute(self, states, taken_actions):
                     action_space=env.action_space,
                     device=device)
 
+# load checkpoint
+agent_sarsa.load("./runs/22-09-10_13-13-41-011999_SARSA/checkpoints/agent_80000.pt")
+
 
 # Configure and instantiate the RL trainer
 cfg_trainer = {"timesteps": 80000, "headless": True}
diff --git a/docs/source/examples/gym/gym_vector_cartpole_dqn.py b/docs/source/examples/gym/gym_vector_cartpole_dqn.py
index 1d737299..8e1ea656 100644
--- a/docs/source/examples/gym/gym_vector_cartpole_dqn.py
+++ b/docs/source/examples/gym/gym_vector_cartpole_dqn.py
@@ -22,32 +22,33 @@
 
 
 # Instantiate a RandomMemory (without replacement) as experience replay memory
-memory = RandomMemory(memory_size=100000, num_envs=env.num_envs, device=device, replacement=False)
+memory = RandomMemory(memory_size=200000, num_envs=env.num_envs, device=device, replacement=False)
 
 
 # Instantiate the agent's models (function approximators) using the model instantiator utility
 # DQN requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#spaces-and-models
-models_dqn = {"q_network": deterministic_model(observation_space=env.observation_space, 
-                                               action_space=env.action_space,
-                                               device=device,
-                                               clip_actions=False, 
-                                               input_shape=Shape.OBSERVATIONS,
-                                               hiddens=[64, 64],
-                                               hidden_activation=["relu", "relu"],
-                                               output_shape=Shape.ACTIONS,
-                                               output_activation=None,
-                                               output_scale=1.0),
-              "target_q_network": deterministic_model(observation_space=env.observation_space,
-                                                      action_space=env.action_space,
-                                                      device=device,
-                                                      clip_actions=False,
-                                                      input_shape=Shape.OBSERVATIONS,
-                                                      hiddens=[64, 64],
-                                                      hidden_activation=["relu", "relu"],
-                                                      output_shape=Shape.ACTIONS,
-                                                      output_activation=None,
-                                                      output_scale=1.0)}
+models_dqn = {}
+models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, 
+                                              action_space=env.action_space,
+                                              device=device,
+                                              clip_actions=False, 
+                                              input_shape=Shape.OBSERVATIONS,
+                                              hiddens=[64, 64],
+                                              hidden_activation=["relu", "relu"],
+                                              output_shape=Shape.ACTIONS,
+                                              output_activation=None,
+                                              output_scale=1.0)
+models_dqn["target_q_network"] = deterministic_model(observation_space=env.observation_space,
+                                                     action_space=env.action_space,
+                                                     device=device,
+                                                     clip_actions=False,
+                                                     input_shape=Shape.OBSERVATIONS,
+                                                     hiddens=[64, 64],
+                                                     hidden_activation=["relu", "relu"],
+                                                     output_shape=Shape.ACTIONS,
+                                                     output_activation=None,
+                                                     output_scale=1.0)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_dqn.values():
@@ -58,7 +59,6 @@
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#configuration-and-hyperparameters
 cfg_dqn = DQN_DEFAULT_CONFIG.copy()
-cfg_dqn["random_timesteps"] = 0
 cfg_dqn["learning_starts"] = 100
 cfg_dqn["exploration"]["final_epsilon"] = 0.04
 cfg_dqn["exploration"]["timesteps"] = 1500
diff --git a/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py b/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py
index 04b6d827..efc912be 100644
--- a/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py
+++ b/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py
@@ -3,21 +3,23 @@
 import torch
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import TabularModel
+from skrl.models.torch import Model, TabularMixin
 from skrl.agents.torch.q_learning import Q_LEARNING, Q_LEARNING_DEFAULT_CONFIG
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 
 
-# Define the model (tabular models) for the Q-learning agent using a helper class
-class EpilonGreedyPolicy(TabularModel):
+# Define the model (tabular model) for the SARSA agent using mixin
+class EpilonGreedyPolicy(TabularMixin, Model):
     def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1):
-        super().__init__(observation_space, action_space, device, num_envs)
+        Model.__init__(self, observation_space, action_space, device)
+        TabularMixin.__init__(self, num_envs)
 
         self.epsilon = epsilon
-        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device)
+        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), 
+                                  dtype=torch.float32, device=self.device)
         
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], 
                                dim=-1, keepdim=True).view(-1,1)
         
@@ -31,40 +33,32 @@ def compute(self, states, taken_actions):
 # Load and wrap the Gym environment.
 # Note: the environment version may change depending on the gym version
 try:
-    env = gym.vector.make("FrozenLake-v0", num_envs=5, asynchronous=False)
+    env = gym.vector.make("FrozenLake-v0", num_envs=10, asynchronous=False)
 except gym.error.DeprecatedEnv as e:
     env_id = [spec.id for spec in gym.envs.registry.all() if spec.id.startswith("FrozenLake-v")][0]
     print("FrozenLake-v0 not found. Trying {}".format(env_id))
-    env = gym.vector.make(env_id, num_envs=5, asynchronous=False)
+    env = gym.vector.make(env_id, num_envs=10, asynchronous=False)
 env = wrap_env(env)
 
-print(env.num_envs)
-print(env.action_space)
-print(env.observation_space)
-print(env._env.action_space)
-print(env._env.observation_space)
-# exit()
 device = env.device
 
 
 # Instantiate the agent's models (table)
 # Q-learning requires 1 model, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#spaces-and-models
-models_q_learning = {"policy": EpilonGreedyPolicy(env.observation_space, env.action_space, device, \
-    num_envs=env.num_envs, epsilon=0.1)}
+models_q_learning = {}
+models_q_learning["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1)
 
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#configuration-and-hyperparameters
 cfg_q_learning = Q_LEARNING_DEFAULT_CONFIG.copy()
-cfg_q_learning["random_timesteps"] = 0
-cfg_q_learning["learning_starts"] = 0
 cfg_q_learning["discount_factor"] = 0.999
 cfg_q_learning["alpha"] = 0.4 
-# logging to TensorBoard and write checkpoints each 1000 and 5000 timesteps respectively
-cfg_q_learning["experiment"]["write_interval"] = 1000
-cfg_q_learning["experiment"]["checkpoint_interval"] = 5000
+# logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively
+cfg_q_learning["experiment"]["write_interval"] = 1600
+cfg_q_learning["experiment"]["checkpoint_interval"] = 8000
 
 agent_q_learning = Q_LEARNING(models=models_q_learning,
                               memory=None, 
diff --git a/docs/source/examples/gym/gym_vector_pendulum_ddpg.py b/docs/source/examples/gym/gym_vector_pendulum_ddpg.py
index 9580340a..c6bf729f 100644
--- a/docs/source/examples/gym/gym_vector_pendulum_ddpg.py
+++ b/docs/source/examples/gym/gym_vector_pendulum_ddpg.py
@@ -5,7 +5,7 @@
 import torch.nn.functional as F
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import DeterministicModel
+from skrl.models.torch import Model, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG
 from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise
@@ -13,35 +13,36 @@
 from skrl.envs.torch import wrap_env
 
 
-# Define the models (deterministic models) for the DDPG agent using a helper class
-# and programming with two approaches (layer by layer and torch.nn.Sequential class).
+# Define the models (deterministic models) for the DDPG agent using mixin
 # - Actor (policy): takes as input the environment's observation/state and returns an action
 # - Critic: takes the state and action as input and provides a value to guide the policy 
-class DeterministicActor(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class DeterministicActor(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.linear_layer_1 = nn.Linear(self.num_observations, 400)
         self.linear_layer_2 = nn.Linear(400, 300)
         self.action_layer = nn.Linear(300, self.num_actions)
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         x = F.relu(self.linear_layer_1(states))
         x = F.relu(self.linear_layer_2(x))
         return 2 * torch.tanh(self.action_layer(x))  # Pendulum-v1 action_space is -2 to 2
 
-class DeterministicCritic(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class DeterministicCritic(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
-        self.net = nn.Sequential(nn.Linear(self.num_observations + self.num_actions, 400),
-                                 nn.ReLU(),
-                                 nn.Linear(400, 300),
-                                 nn.ReLU(),
-                                 nn.Linear(300, 1))
+        self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 400)
+        self.linear_layer_2 = nn.Linear(400, 300)
+        self.linear_layer_3 = nn.Linear(300, 1)
 
-    def compute(self, states, taken_actions):
-        return self.net(torch.cat([states, taken_actions], dim=1))
+    def compute(self, states, taken_actions, role):
+        x = F.relu(self.linear_layer_1(torch.cat([states, taken_actions], dim=1)))
+        x = F.relu(self.linear_layer_2(x))
+        return self.linear_layer_3(x)
 
 
 # Load and wrap the Gym environment.
@@ -58,16 +59,17 @@ def compute(self, states, taken_actions):
 
 
 # Instantiate a RandomMemory (without replacement) as experience replay memory
-memory = RandomMemory(memory_size=15000, num_envs=env.num_envs, device=device, replacement=False)
+memory = RandomMemory(memory_size=100000, num_envs=env.num_envs, device=device, replacement=False)
 
 
 # Instantiate the agent's models (function approximators).
 # DDPG requires 4 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models
-models_ddpg = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-               "target_policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-               "critic": DeterministicCritic(env.observation_space, env.action_space, device),
-               "target_critic": DeterministicCritic(env.observation_space, env.action_space, device)}
+models_ddpg = {}
+models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device)
+models_ddpg["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device)
+models_ddpg["critic"] = DeterministicCritic(env.observation_space, env.action_space, device)
+models_ddpg["target_critic"] = DeterministicCritic(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ddpg.values():
diff --git a/docs/source/examples/gym/gym_vector_taxi_sarsa.py b/docs/source/examples/gym/gym_vector_taxi_sarsa.py
index e12ce3eb..581489b2 100644
--- a/docs/source/examples/gym/gym_vector_taxi_sarsa.py
+++ b/docs/source/examples/gym/gym_vector_taxi_sarsa.py
@@ -3,21 +3,23 @@
 import torch
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import TabularModel
+from skrl.models.torch import Model, TabularMixin
 from skrl.agents.torch.sarsa import SARSA, SARSA_DEFAULT_CONFIG
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 
 
-# Define the model (tabular models) for the SARSA agent using a helper class
-class EpilonGreedyPolicy(TabularModel):
+# Define the model (tabular model) for the SARSA agent using mixin
+class EpilonGreedyPolicy(TabularMixin, Model):
     def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1):
-        super().__init__(observation_space, action_space, device, num_envs)
+        Model.__init__(self, observation_space, action_space, device)
+        TabularMixin.__init__(self, num_envs)
 
         self.epsilon = epsilon
-        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device)
+        self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), 
+                                  dtype=torch.float32, device=self.device)
         
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], 
                                dim=-1, keepdim=True).view(-1,1)
         
@@ -44,21 +46,19 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (table)
 # SARSA requires 1 model, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#spaces-and-models
-models_sarsa = {"policy": EpilonGreedyPolicy(env.observation_space, env.action_space, device, \
-    num_envs=env.num_envs, epsilon=0.1)}
+models_sarsa = {}
+models_sarsa["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1)
 
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#configuration-and-hyperparameters
 cfg_sarsa = SARSA_DEFAULT_CONFIG.copy()
-cfg_sarsa["random_timesteps"] = 0
-cfg_sarsa["learning_starts"] = 0
 cfg_sarsa["discount_factor"] = 0.999
 cfg_sarsa["alpha"] = 0.4 
-# logging to TensorBoard and write checkpoints each 1000 and 5000 timesteps respectively
-cfg_sarsa["experiment"]["write_interval"] = 1000
-cfg_sarsa["experiment"]["checkpoint_interval"] = 5000
+# logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively
+cfg_sarsa["experiment"]["write_interval"] = 1600
+cfg_sarsa["experiment"]["checkpoint_interval"] = 8000
 
 agent_sarsa = SARSA(models=models_sarsa,
                     memory=None, 

From a2c66f7f3baf2d815bfb5b8076cfe005d835c606 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 10 Sep 2022 23:50:16 +0200
Subject: [PATCH 066/108] Update OpenAI Gym examples in docs

---
 docs/source/intro/examples.rst | 144 ++++++++++++++++++++-------------
 1 file changed, 87 insertions(+), 57 deletions(-)

diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index 02adee87..ee9de042 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -12,10 +12,10 @@ Examples
 
    <hr>
 
-Learning in a Gym environment (one agent, one environment)
-----------------------------------------------------------
+Learning in an OpenAI Gym environment 
+-------------------------------------
 
-This example performs the training of one agent in an OpenAI Gym environment
+These examples perform the training of one agent in an OpenAI Gym environment (**one agent, one environment**)
 
 .. image:: ../_static/imgs/example_gym.png
       :width: 100%
@@ -28,10 +28,10 @@ This example performs the training of one agent in an OpenAI Gym environment
 
 The following components or practices are exemplified (highlighted):
 
-    - Load and wrap an OpenAI Gym environment: **Pendulum (DDPG)**
+    - Load and wrap an OpenAI Gym environment: **Pendulum (DDPG)**, **CartPole (CEM)**
     - Instantiate models using the model instantiation utility: **CartPole (DQN)**
     - Create a tabular model (:math:`\epsilon`-greedy policy): **Taxi (SARSA)**, **FrozenLake (Q-Learning)**
-    - Load a checkpoint during evaluation: **Pendulum (DDPG)**, **CartPole (DQN)**, **Taxi (SARSA)**, **FrozenLake (Q-Learning)**
+    - Load a checkpoint during evaluation: **Pendulum (DDPG)**, **CartPole (CEM)**, **CartPole (DQN)**, **Taxi (SARSA)**, **FrozenLake (Q-Learning)**
 
 .. tabs::
             
@@ -39,106 +39,132 @@ The following components or practices are exemplified (highlighted):
 
         .. tabs::
             
-            .. tab:: Training
+            .. group-tab:: Training
 
-                View the raw code: `gym_pendulum_ddpg.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_pendulum_ddpg.py>`_
+                :download:`gym_pendulum_ddpg.py <../examples/gym/gym_pendulum_ddpg.py>`
 
                 .. literalinclude:: ../examples/gym/gym_pendulum_ddpg.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 1, 13, 49-55, 99
+                    :emphasize-lines: 1, 13, 50-56
 
-            .. tab:: Evaluation
+            .. group-tab:: Evaluation
                 
-                View the raw code: `gym_pendulum_ddpg_eval.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_pendulum_ddpg_eval.py>`_
+                :download:`gym_pendulum_ddpg_eval.py <../examples/gym/gym_pendulum_ddpg_eval.py>`
 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
+                **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the <module> module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined
+
                 .. literalinclude:: ../examples/gym/gym_pendulum_ddpg_eval.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 45, 48, 73
+                    :emphasize-lines: 67
+
+    .. tab:: CartPole (CEM)
+
+        .. tabs::
+
+            .. group-tab:: Training
+
+                :download:`gym_cartpole_cem.py <../examples/gym/gym_cartpole_cem.py>`
+
+                .. literalinclude:: ../examples/gym/gym_cartpole_cem.py
+                    :language: python
+                    :emphasize-lines: 1, 11, 33-39
+
+            .. group-tab:: Evaluation
+
+                :download:`gym_cartpole_cem_eval.py <../examples/gym/gym_cartpole_cem_eval.py>`
+
+                **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
+
+                **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the <module> module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined
+
+                .. literalinclude:: ../examples/gym/gym_cartpole_cem_eval.py
+                    :language: python
+                    :emphasize-lines: 68
 
     .. tab:: CartPole (DQN)
 
         .. tabs::
             
-            .. tab:: Training
+            .. group-tab:: Training
                 
-                View the raw code: `gym_cartpole_dqn.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_cartpole_dqn.py>`_
+                :download:`gym_cartpole_dqn.py <../examples/gym/gym_cartpole_dqn.py>`
 
                 .. literalinclude:: ../examples/gym/gym_cartpole_dqn.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 4, 31-50, 69
+                    :emphasize-lines: 4, 31-51
         
-            .. tab:: Evaluation
+            .. group-tab:: Evaluation
                 
-                View the raw code: `gym_cartpole_dqn_eval.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_cartpole_dqn_eval.py>`_
+                :download:`gym_cartpole_dqn_eval.py <../examples/gym/gym_cartpole_dqn_eval.py>`
                 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
+                **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the <module> module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined
+
                 .. literalinclude:: ../examples/gym/gym_cartpole_dqn_eval.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 26-35, 38, 64
+                    :emphasize-lines: 56
     
     .. tab:: Taxi (SARSA)
 
         .. tabs::
             
-            .. tab:: Training
+            .. group-tab:: Training
                 
-                View the raw code: `gym_taxi_sarsa.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_taxi_sarsa.py>`_
+                :download:`gym_taxi_sarsa.py <../examples/gym/gym_taxi_sarsa.py>`
 
                 .. literalinclude:: ../examples/gym/gym_taxi_sarsa.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 6, 13-28
+                    :emphasize-lines: 6, 13-30
         
-            .. tab:: Evaluation
+            .. group-tab:: Evaluation
                 
-                View the raw code: `gym_taxi_sarsa_eval.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_taxi_sarsa_eval.py>`_
+                :download:`gym_taxi_sarsa_eval.py <../examples/gym/gym_taxi_sarsa_eval.py>`
                 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
+                **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the <module> module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined
+
                 .. literalinclude:: ../examples/gym/gym_taxi_sarsa_eval.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 47-48, 51, 76
+                    :emphasize-lines: 70
     
     .. tab:: FrozenLake (Q-learning)
 
         .. tabs::
             
-            .. tab:: Training
+            .. group-tab:: Training
                 
-                View the raw code: `gym_frozen_lake_q_learning.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_frozen_lake_q_learning.py>`_
+                :download:`gym_frozen_lake_q_learning.py <../examples/gym/gym_frozen_lake_q_learning.py>`
 
                 .. literalinclude:: ../examples/gym/gym_frozen_lake_q_learning.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 6, 13-28
+                    :emphasize-lines: 6, 13-30
         
-            .. tab:: Evaluation
+            .. group-tab:: Evaluation
                 
-                View the raw code: `gym_frozen_lake_q_learning_eval.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py>`_
+                :download:`gym_frozen_lake_q_learning_eval.py <../examples/gym/gym_frozen_lake_q_learning_eval.py>`
                 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
+                **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the <module> module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined
+
                 .. literalinclude:: ../examples/gym/gym_frozen_lake_q_learning_eval.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 47-48, 51, 76
+                    :emphasize-lines: 70
 
 .. raw:: html
 
    <hr>
 
-Learning in a Gym vectorized environment (one agent, multiple environments)
----------------------------------------------------------------------------
+Learning in an OpenAI Gym vectorized environment
+------------------------------------------------
 
-This example performs the training of one agent in an OpenAI Gym vectorized environment (multiple independent copies of the same environment in parallel). The following components or practices are exemplified (highlighted):
+These examples perform the training of one agent in an OpenAI Gym vectorized environment (**one agent, multiple independent copies of the same environment in parallel**)
+
+The following components or practices are exemplified (highlighted):
 
     - Load and wrap an OpenAI Gym vectorized environment: **Pendulum (DDPG)**, **CartPole (DQN)**, **Taxi (SARSA)**, **FrozenLake (Q-Learning)**
 
@@ -148,53 +174,49 @@ This example performs the training of one agent in an OpenAI Gym vectorized envi
 
         .. tabs::
             
-            .. tab:: Training
+            .. group-tab:: Training
 
-                View the raw code: `gym_vector_pendulum_ddpg.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_vector_pendulum_ddpg.py>`_
+                :download:`gym_vector_pendulum_ddpg.py <../examples/gym/gym_vector_pendulum_ddpg.py>`
 
                 .. literalinclude:: ../examples/gym/gym_vector_pendulum_ddpg.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 1, 13, 49-55
+                    :emphasize-lines: 1, 13, 50-56
 
     .. tab:: CartPole (DQN)
 
         .. tabs::
             
-            .. tab:: Training
+            .. group-tab:: Training
                 
-                View the raw code: `gym_vector_cartpole_dqn.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_vector_cartpole_dqn.py>`_
+                :download:`gym_vector_cartpole_dqn.py <../examples/gym/gym_vector_cartpole_dqn.py>`
 
                 .. literalinclude:: ../examples/gym/gym_vector_cartpole_dqn.py
                     :language: python
-                    :linenos:
                     :emphasize-lines: 1, 8, 13-19
     
     .. tab:: Taxi (SARSA)
 
         .. tabs::
             
-            .. tab:: Training
+            .. group-tab:: Training
                 
-                View the raw code: `gym_vector_taxi_sarsa.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_vector_taxi_sarsa.py>`_
+                :download:`gym_vector_taxi_sarsa.py <../examples/gym/gym_vector_taxi_sarsa.py>`
 
                 .. literalinclude:: ../examples/gym/gym_vector_taxi_sarsa.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 1, 9, 33-39
+                    :emphasize-lines: 1, 9, 35-41
     
     .. tab:: FrozenLake (Q-learning)
 
         .. tabs::
             
-            .. tab:: Training
+            .. group-tab:: Training
                 
-                View the raw code: `gym_vector_frozen_lake_q_learning.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py>`_
+                :download:`gym_vector_frozen_lake_q_learning.py <../examples/gym/gym_vector_frozen_lake_q_learning.py>`
 
                 .. literalinclude:: ../examples/gym/gym_vector_frozen_lake_q_learning.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 1, 9, 33-39
+                    :emphasize-lines: 1, 9, 35-41
 
 .. raw:: html
 
@@ -225,7 +247,7 @@ The following components or practices are exemplified (highlighted):
 
         .. tabs::
             
-            .. tab:: Training
+            .. group-tab:: Training
 
                 View the raw code: `dm_suite_cartpole_swingup_ddpg.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py>`_
 
@@ -238,7 +260,7 @@ The following components or practices are exemplified (highlighted):
 
         .. tabs::
             
-            .. tab:: Training
+            .. group-tab:: Training
 
                 View the raw code: `dm_manipulation_stack_sac.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/deepmind/dm_manipulation_stack_sac.py>`_
 
@@ -445,6 +467,8 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
                 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
+                **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the <module> module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined
+
                 .. literalinclude:: ../examples/isaacgym/ppo_cartpole_eval.py
                     :language: python
                     :linenos:
@@ -506,6 +530,8 @@ The following components or practices are exemplified (highlighted):
                 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
+                **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the <module> module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined
+
                 .. literalinclude:: ../examples/isaacgym/isaacgym_sequential_shared_memory_eval.py
                     :language: python
                     :linenos:
@@ -539,6 +565,8 @@ The following components or practices are exemplified (highlighted):
                 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
+                **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the <module> module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined
+
                 .. literalinclude:: ../examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py
                     :language: python
                     :linenos:
@@ -550,6 +578,8 @@ The following components or practices are exemplified (highlighted):
                 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
+                **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the <module> module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined
+
                 .. literalinclude:: ../examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py
                     :language: python
                     :linenos:

From 92a8fdeeb435bb6ab1d01891b315373eca47008b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 11 Sep 2022 00:11:21 +0200
Subject: [PATCH 067/108] Fix vectorized Discrete action space conversion

---
 skrl/envs/torch/wrappers.py | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/skrl/envs/torch/wrappers.py b/skrl/envs/torch/wrappers.py
index 2ab26175..5f903e58 100644
--- a/skrl/envs/torch/wrappers.py
+++ b/skrl/envs/torch/wrappers.py
@@ -352,13 +352,15 @@ def _tensor_to_action(self, actions: torch.Tensor) -> Any:
             if isinstance(space, gym.spaces.MultiDiscrete):
                 return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape)
             elif isinstance(space, gym.spaces.Tuple):
-                return np.array(actions.cpu().numpy(), dtype=space[0].dtype).reshape(space.shape)
+                if isinstance(space[0], gym.spaces.Box):
+                    return np.array(actions.cpu().numpy(), dtype=space[0].dtype).reshape(space.shape)
+                elif isinstance(space[0], gym.spaces.Discrete):
+                    return np.array(actions.cpu().numpy(), dtype=space[0].dtype).reshape(-1)
         elif isinstance(space, gym.spaces.Discrete):
             return actions.item()
         elif isinstance(space, gym.spaces.Box):
             return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape)
-        else:
-            raise ValueError("Action space type {} not supported. Please report this issue".format(type(space)))
+        raise ValueError("Action space type {} not supported. Please report this issue".format(type(space)))
 
     def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Any]:
         """Perform a step in the environment
@@ -608,53 +610,53 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper:
     :rtype: Wrapper
     """
     if verbose:
-        print("[INFO] Environment:", [str(base).replace("<class '", "").replace("'>", "") \
-            for base in env.__class__.__bases__])
+        logger.info("Environment class: {}".format(", ".join([str(base).replace("<class '", "").replace("'>", "") \
+            for base in env.__class__.__bases__])))
     if wrapper == "auto":
         base_classes = [str(base) for base in env.__class__.__bases__]
         if "<class 'omni.isaac.gym.vec_env.vec_env_base.VecEnvBase'>" in base_classes or \
             "<class 'omni.isaac.gym.vec_env.vec_env_mt.VecEnvMT'>" in base_classes:
             if verbose:
-                print("[INFO] Wrapper: Omniverse Isaac Gym")
+                logger.info("Environment wrapper: Omniverse Isaac Gym")
             return OmniverseIsaacGymWrapper(env)
         elif isinstance(env, gym.core.Env) or isinstance(env, gym.core.Wrapper):
             if verbose:
-                print("[INFO] Wrapper: Gym")
+                logger.info("Environment wrapper: Gym")
             return GymWrapper(env)
         elif "<class 'dm_env._environment.Environment'>" in base_classes:
             if verbose:
-                print("[INFO] Wrapper: DeepMind")
+                logger.info("Environment wrapper: DeepMind")
             return DeepMindWrapper(env)
         elif "<class 'rlgpu.tasks.base.vec_task.VecTask'>" in base_classes:
             if verbose:
-                print("[INFO] Wrapper: Isaac Gym (preview 2)")
+                logger.info("Environment wrapper: Isaac Gym (preview 2)")
             return IsaacGymPreview2Wrapper(env)
         if verbose:
-            print("[INFO] Wrapper: Isaac Gym (preview 3/4)")
+            logger.info("Environment wrapper: Isaac Gym (preview 3/4)")
         return IsaacGymPreview3Wrapper(env)  # preview 4 is the same as 3
     elif wrapper == "gym":
         if verbose:
-            print("[INFO] Wrapper: Gym")
+            logger.info("Environment wrapper: Gym")
         return GymWrapper(env)
     elif wrapper == "dm":
         if verbose:
-            print("[INFO] Wrapper: DeepMind")
+            logger.info("Environment wrapper: DeepMind")
         return DeepMindWrapper(env)
     elif wrapper == "isaacgym-preview2":
         if verbose:
-            print("[INFO] Wrapper: Isaac Gym (preview 2)")
+            logger.info("Environment wrapper: Isaac Gym (preview 2)")
         return IsaacGymPreview2Wrapper(env)
     elif wrapper == "isaacgym-preview3":
         if verbose:
-            print("[INFO] Wrapper: Isaac Gym (preview 3)")
+            logger.info("Environment wrapper: Isaac Gym (preview 3)")
         return IsaacGymPreview3Wrapper(env)
     elif wrapper == "isaacgym-preview4":
         if verbose:
-            print("[INFO] Wrapper: Isaac Gym (preview 4)")
+            logger.info("Environment wrapper: Isaac Gym (preview 4)")
         return IsaacGymPreview3Wrapper(env)  # preview 4 is the same as 3
     elif wrapper == "omniverse-isaacgym":
         if verbose:
-            print("[INFO] Wrapper: Omniverse Isaac Gym")
+            logger.info("Environment wrapper: Omniverse Isaac Gym")
         return OmniverseIsaacGymWrapper(env)
     else:
         raise ValueError("Unknown {} wrapper type".format(wrapper))

From 37c08cd4e3f097d5bbc0167bf05611218241d784 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 11 Sep 2022 16:49:13 +0200
Subject: [PATCH 068/108] Update Omniverse Isaac Gym examples

---
 .../examples/omniisaacgym/ppo_allegro_hand.py | 26 ++++++++++---------
 docs/source/examples/omniisaacgym/ppo_ant.py  | 26 ++++++++++---------
 .../examples/omniisaacgym/ppo_ant_mt.py       | 26 ++++++++++---------
 .../examples/omniisaacgym/ppo_cartpole.py     | 26 ++++++++++---------
 .../examples/omniisaacgym/ppo_cartpole_mt.py  | 26 ++++++++++---------
 .../examples/omniisaacgym/ppo_humanoid.py     | 26 ++++++++++---------
 .../examples/omniisaacgym/ppo_shadow_hand.py  | 26 ++++++++++---------
 7 files changed, 98 insertions(+), 84 deletions(-)

diff --git a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py
index 02e06628..8b3f589f 100644
--- a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py
+++ b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py
@@ -2,7 +2,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
@@ -17,14 +17,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -35,12 +35,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(128, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -50,7 +51,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(128, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
@@ -68,8 +69,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -80,7 +82,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 8
+cfg_ppo["rollouts"] = 8  # memory_size
 cfg_ppo["learning_epochs"] = 5
 cfg_ppo["mini_batches"] = 4  # 8 * 16384 / 32768
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/omniisaacgym/ppo_ant.py b/docs/source/examples/omniisaacgym/ppo_ant.py
index dc446032..e6482534 100644
--- a/docs/source/examples/omniisaacgym/ppo_ant.py
+++ b/docs/source/examples/omniisaacgym/ppo_ant.py
@@ -2,7 +2,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
@@ -17,14 +17,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -35,12 +35,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(64, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -50,7 +51,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(64, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
@@ -68,8 +69,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -80,7 +82,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 16
+cfg_ppo["rollouts"] = 16  # memory_size
 cfg_ppo["learning_epochs"] = 4
 cfg_ppo["mini_batches"] = 2  # 16 * 4096 / 32768
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/omniisaacgym/ppo_ant_mt.py b/docs/source/examples/omniisaacgym/ppo_ant_mt.py
index e576af3b..6072cfc0 100644
--- a/docs/source/examples/omniisaacgym/ppo_ant_mt.py
+++ b/docs/source/examples/omniisaacgym/ppo_ant_mt.py
@@ -4,7 +4,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(64, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -52,7 +53,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(64, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
@@ -70,8 +71,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -82,7 +84,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 16
+cfg_ppo["rollouts"] = 16  # memory_size
 cfg_ppo["learning_epochs"] = 4
 cfg_ppo["mini_batches"] = 2  # 16 * 4096 / 32768
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole.py b/docs/source/examples/omniisaacgym/ppo_cartpole.py
index e14aadd5..248ffd0e 100644
--- a/docs/source/examples/omniisaacgym/ppo_cartpole.py
+++ b/docs/source/examples/omniisaacgym/ppo_cartpole.py
@@ -2,7 +2,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
@@ -17,14 +17,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
@@ -33,12 +33,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
@@ -46,7 +47,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
@@ -64,8 +65,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -76,7 +78,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 16
+cfg_ppo["rollouts"] = 16  # memory_size
 cfg_ppo["learning_epochs"] = 8
 cfg_ppo["mini_batches"] = 1  # 16 * 512 / 8192
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
index 3672d791..8cbb23cd 100644
--- a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
+++ b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
@@ -4,7 +4,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
@@ -35,12 +35,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
@@ -48,7 +49,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
@@ -66,8 +67,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -78,7 +80,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 16
+cfg_ppo["rollouts"] = 16  # memory_size
 cfg_ppo["learning_epochs"] = 8
 cfg_ppo["mini_batches"] = 1  # 16 * 512 / 8192
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/omniisaacgym/ppo_humanoid.py b/docs/source/examples/omniisaacgym/ppo_humanoid.py
index 9fd60330..29d57b6d 100644
--- a/docs/source/examples/omniisaacgym/ppo_humanoid.py
+++ b/docs/source/examples/omniisaacgym/ppo_humanoid.py
@@ -2,7 +2,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
@@ -17,14 +17,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 400),
                                  nn.ELU(),
@@ -35,12 +35,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(100, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 400),
                                  nn.ELU(),
@@ -50,7 +51,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(100, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
@@ -68,8 +69,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -80,7 +82,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 32
+cfg_ppo["rollouts"] = 32  # memory_size
 cfg_ppo["learning_epochs"] = 5
 cfg_ppo["mini_batches"] = 4  # 32 * 4096 / 32768
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/omniisaacgym/ppo_shadow_hand.py b/docs/source/examples/omniisaacgym/ppo_shadow_hand.py
index e7b66041..1d1e9e11 100644
--- a/docs/source/examples/omniisaacgym/ppo_shadow_hand.py
+++ b/docs/source/examples/omniisaacgym/ppo_shadow_hand.py
@@ -2,7 +2,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
@@ -17,14 +17,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(128, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -54,7 +55,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(128, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
@@ -72,8 +73,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -84,7 +86,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 8
+cfg_ppo["rollouts"] = 8  # memory_size
 cfg_ppo["learning_epochs"] = 5
 cfg_ppo["mini_batches"] = 4  # 8 * 16384 / 32768
 cfg_ppo["discount_factor"] = 0.99

From a52a698f9f7b50ae4743f2f240520f583b4ea563 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 12 Sep 2022 19:04:46 +0200
Subject: [PATCH 069/108] Fix tensor dimension when computing parallel variance

---
 .../resources/preprocessors/torch/running_standard_scaler.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/skrl/resources/preprocessors/torch/running_standard_scaler.py b/skrl/resources/preprocessors/torch/running_standard_scaler.py
index 709bc05f..12c8fe47 100644
--- a/skrl/resources/preprocessors/torch/running_standard_scaler.py
+++ b/skrl/resources/preprocessors/torch/running_standard_scaler.py
@@ -101,7 +101,10 @@ def _compute(self, x: torch.Tensor, train: bool = False, inverse: bool = False)
         :type inverse: bool, optional
         """
         if train:
-            self._parallel_variance(torch.mean(x, dim=0), torch.var(x, dim=0), x.shape[0])
+            if x.dim() == 3:
+                self._parallel_variance(torch.mean(x, dim=(0,1)), torch.var(x, dim=(0,1)), x.shape[0] * x.shape[1])
+            else:
+                self._parallel_variance(torch.mean(x, dim=0), torch.var(x, dim=0), x.shape[0])
 
         # scale back the data to the original representation
         if inverse:

From a447e86c43ce1dca2a5d906f43b8b1c50664a43d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 12 Sep 2022 23:03:11 +0200
Subject: [PATCH 070/108] Update Isaac Gym examples

---
 docs/source/examples/isaacgym/amp_humanoid.py |  37 ++++---
 .../isaacgym_parallel_no_shared_memory.py     | 101 +++++++++---------
 ...isaacgym_parallel_no_shared_memory_eval.py |  90 ++++++----------
 .../isaacgym_sequential_no_shared_memory.py   | 101 +++++++++---------
 ...aacgym_sequential_no_shared_memory_eval.py |  89 ++++++++-------
 .../isaacgym_sequential_shared_memory.py      | 101 +++++++++---------
 .../isaacgym_sequential_shared_memory_eval.py |  89 ++++++++-------
 .../examples/isaacgym/ppo_allegro_hand.py     |  26 ++---
 docs/source/examples/isaacgym/ppo_ant.py      |  38 +++----
 docs/source/examples/isaacgym/ppo_anymal.py   |  38 +++----
 .../examples/isaacgym/ppo_anymal_terrain.py   |  38 +++----
 .../examples/isaacgym/ppo_ball_balance.py     |  38 +++----
 docs/source/examples/isaacgym/ppo_cartpole.py |  38 +++----
 .../examples/isaacgym/ppo_cartpole_eval.py    |  51 +++++----
 .../examples/isaacgym/ppo_franka_cabinet.py   |  38 +++----
 docs/source/examples/isaacgym/ppo_humanoid.py |  38 +++----
 .../source/examples/isaacgym/ppo_ingenuity.py |  26 ++---
 .../examples/isaacgym/ppo_quadcopter.py       |  38 +++----
 .../examples/isaacgym/ppo_shadow_hand.py      |  38 +++----
 .../source/examples/isaacgym/ppo_trifinger.py |  38 +++----
 .../source/examples/isaacgym/trpo_cartpole.py |  40 ++++---
 21 files changed, 527 insertions(+), 604 deletions(-)

diff --git a/docs/source/examples/isaacgym/amp_humanoid.py b/docs/source/examples/isaacgym/amp_humanoid.py
index 8b3c4f05..11a3e580 100644
--- a/docs/source/examples/isaacgym/amp_humanoid.py
+++ b/docs/source/examples/isaacgym/amp_humanoid.py
@@ -4,7 +4,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.amp import AMP, AMP_DEFAULT_CONFIG
 from skrl.resources.preprocessors.torch import RunningStandardScaler
@@ -18,15 +18,15 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
 # - Discriminator: differentiate between police-generated behaviors and behaviors from the motion dataset
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std, reduction)
+                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 1024),
                                  nn.ReLU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
         # set a fixed log standard deviation for the policy
         self.log_std_parameter = nn.Parameter(torch.full((self.num_actions,), fill_value=-2.9), requires_grad=False)
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return torch.tanh(self.net(states)), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 1024),
                                  nn.ReLU(),
@@ -50,12 +51,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ReLU(),
                                  nn.Linear(512, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
-class Discriminator(DeterministicModel):
+class Discriminator(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 1024),
                                  nn.ReLU(),
@@ -63,7 +65,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ReLU(),
                                  nn.Linear(512, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
@@ -81,16 +83,17 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # AMP requires 3 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.amp.html#spaces-and-models
-models_amp = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device),
-              "discriminator": Discriminator(env.amp_observation_space, env.action_space, device)}
+models_amp = {}
+models_amp["policy"] = Policy(env.observation_space, env.action_space, device)
+models_amp["value"] = Value(env.observation_space, env.action_space, device)
+models_amp["discriminator"] = Discriminator(env.amp_observation_space, env.action_space, device)
 
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.amp.html#configuration-and-hyperparameters
 cfg_amp = AMP_DEFAULT_CONFIG.copy()
-cfg_amp["rollouts"] = 16
+cfg_amp["rollouts"] = 16  # memory_size
 cfg_amp["learning_epochs"] = 6
 cfg_amp["mini_batches"] = 2  # 16 * 4096 / 32768
 cfg_amp["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py
index 4f852873..6a1d8cf9 100644
--- a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py
+++ b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py
@@ -2,10 +2,9 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG
 from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG
@@ -13,46 +12,47 @@
 from skrl.resources.noises.torch import GaussianNoise, OrnsteinUhlenbeckNoise
 from skrl.trainers.torch import ParallelTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 
 
-# Define the models (stochastic and deterministic models) for the agents using helper classes 
-# and programming with two approaches (layer by layer and torch.nn.Sequential class).
+# Define the models (stochastic and deterministic models) for the agents using mixins.
 # - StochasticActor: takes as input the environment's observation/state and returns an action
 # - DeterministicActor: takes as input the environment's observation/state and returns an action
 # - Critic: takes the state and action as input and provides a value to guide the policy
-class StochasticActor(GaussianModel):
+class StochasticActor(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.mean_action_layer = nn.Linear(32, self.num_actions)
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
 
-class DeterministicActor(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class DeterministicActor(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.action_layer = nn.Linear(32, self.num_actions)
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
 
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.action_layer(x))
+    def compute(self, states, taken_actions, role):
+        return self.net(states)
 
-class Critic(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class Critic(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations + self.num_actions, 32),
                                  nn.ELU(),
@@ -60,20 +60,14 @@ def __init__(self, observation_space, action_space, device, clip_actions = False
                                  nn.ELU(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(torch.cat([states, taken_actions], dim=1))
 
 
 if __name__ == '__main__':
 
-    # Load and wrap the Isaac Gym environment.
-    # The following lines are intended to support all versions (preview 2, 3 and 4). 
-    # It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-    try:
-        env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
-    except Exception as e:
-        print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-        env = load_isaacgym_env_preview2("Cartpole")
+    # Load and wrap the Isaac Gym environment
+    env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
     env = wrap_env(env)
 
     device = env.device
@@ -88,25 +82,28 @@ def compute(self, states, taken_actions):
     # Instantiate the agent's models (function approximators).
     # DDPG requires 4 models, visit its documentation for more details
     # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models
-    models_ddpg = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-                   "target_policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-                   "critic": Critic(env.observation_space, env.action_space, device),
-                   "target_critic": Critic(env.observation_space, env.action_space, device)}
+    models_ddpg = {}
+    models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+    models_ddpg["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+    models_ddpg["critic"] = Critic(env.observation_space, env.action_space, device)
+    models_ddpg["target_critic"] = Critic(env.observation_space, env.action_space, device)
     # TD3 requires 6 models, visit its documentation for more details
     # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models
-    models_td3 = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-                  "target_policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-                  "critic_1": Critic(env.observation_space, env.action_space, device),
-                  "critic_2": Critic(env.observation_space, env.action_space, device),
-                  "target_critic_1": Critic(env.observation_space, env.action_space, device),
-                  "target_critic_2": Critic(env.observation_space, env.action_space, device)}
+    models_td3 = {}
+    models_td3["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+    models_td3["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+    models_td3["critic_1"] = Critic(env.observation_space, env.action_space, device)
+    models_td3["critic_2"] = Critic(env.observation_space, env.action_space, device)
+    models_td3["target_critic_1"] = Critic(env.observation_space, env.action_space, device)
+    models_td3["target_critic_2"] = Critic(env.observation_space, env.action_space, device)
     # SAC requires 5 models, visit its documentation for more details
     # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models
-    models_sac = {"policy": StochasticActor(env.observation_space, env.action_space, device, clip_actions=True),
-                  "critic_1": Critic(env.observation_space, env.action_space, device),
-                  "critic_2": Critic(env.observation_space, env.action_space, device),
-                  "target_critic_1": Critic(env.observation_space, env.action_space, device),
-                  "target_critic_2": Critic(env.observation_space, env.action_space, device)}
+    models_sac = {}
+    models_sac["policy"] = StochasticActor(env.observation_space, env.action_space, device, clip_actions=True)
+    models_sac["critic_1"] = Critic(env.observation_space, env.action_space, device)
+    models_sac["critic_2"] = Critic(env.observation_space, env.action_space, device)
+    models_sac["target_critic_1"] = Critic(env.observation_space, env.action_space, device)
+    models_sac["target_critic_2"] = Critic(env.observation_space, env.action_space, device)
 
     # Initialize the models' parameters (weights and biases) using a Gaussian distribution
     for model in models_ddpg.values():
diff --git a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py
index 6a0b6bbb..a935e9c9 100644
--- a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py
+++ b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py
@@ -2,78 +2,53 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
-from skrl.memories.torch import RandomMemory
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG
 from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG
 from skrl.agents.torch.sac import SAC, SAC_DEFAULT_CONFIG
-from skrl.resources.noises.torch import GaussianNoise, OrnsteinUhlenbeckNoise
 from skrl.trainers.torch import ParallelTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 
 
-# Define the models (stochastic and deterministic models) for the agents using helper classes 
-# and programming with two approaches (layer by layer and torch.nn.Sequential class).
-# - StochasticActor: takes as input the environment's observation/state and returns an action
-# - DeterministicActor: takes as input the environment's observation/state and returns an action
-# - Critic: takes the state and action as input and provides a value to guide the policy
-class StochasticActor(GaussianModel):
+# Define only the policies for evaluation
+class StochasticActor(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.mean_action_layer = nn.Linear(32, self.num_actions)
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter
-
-class DeterministicActor(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
 
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.action_layer = nn.Linear(32, self.num_actions)
+class DeterministicActor(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.action_layer(x))
-
-class Critic(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations + self.num_actions, 32),
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
                                  nn.Linear(32, 32),
                                  nn.ELU(),
-                                 nn.Linear(32, 1))
+                                 nn.Linear(32, self.num_actions))
 
-    def compute(self, states, taken_actions):
-        return self.net(torch.cat([states, taken_actions], dim=1))
+    def compute(self, states, taken_actions, role):
+        return self.net(states)
 
 
 if __name__ == '__main__':
 
-    # Load and wrap the Isaac Gym environment.
-    # The following lines are intended to support all versions (preview 2, 3 and 4). 
-    # It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-    try:
-        env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
-    except Exception as e:
-        print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-        env = load_isaacgym_env_preview2("Cartpole")
+    # Load and wrap the Isaac Gym environment
+    env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
     env = wrap_env(env)
 
     device = env.device
@@ -82,18 +57,16 @@ def compute(self, states, taken_actions):
     # Instantiate the agent's models (function approximators).
     # DDPG requires 4 models, visit its documentation for more details
     # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models
-    models_ddpg = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)}
+    models_ddpg = {}
+    models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
     # TD3 requires 6 models, visit its documentation for more details
     # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models
-    models_td3 = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)}
+    models_td3 = {}
+    models_td3["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
     # SAC requires 5 models, visit its documentation for more details
     # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models
-    models_sac = {"policy": StochasticActor(env.observation_space, env.action_space, device, clip_actions=True)}
-
-    # load checkpoints
-    models_ddpg["policy"].load("./runs/22-03-15_21-30-05-578065_DDPG/checkpoints/2000_policy.pt")
-    models_td3["policy"].load("./runs/22-03-15_21-30-05-401434_TD3/checkpoints/2000_policy.pt")
-    models_sac["policy"].load("./runs/22-03-15_21-30-05-596393_SAC/checkpoints/2000_policy.pt")
+    models_sac = {}
+    models_sac["policy"] = StochasticActor(env.observation_space, env.action_space, device, clip_actions=True)
 
 
     # Configure and instantiate the agents.
@@ -138,6 +111,11 @@ def compute(self, states, taken_actions):
                     action_space=env.action_space,
                     device=device)
 
+    # load checkpoint (agent)
+    agent_ddpg.load("./runs/22-09-12_22-30-58-982355_DDPG/checkpoints/agent_8000.pt")
+    agent_td3.load("./runs/22-09-12_22-30-58-986295_TD3/checkpoints/agent_8000.pt")
+    agent_sac.load("./runs/22-09-12_22-30-58-987142_SAC/checkpoints/agent_8000.pt")
+
 
     # Configure and instantiate the RL trainer and define the agent scopes
     cfg = {"timesteps": 8000, "headless": True}
diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py
index 38486e99..3cb0dc54 100644
--- a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py
+++ b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py
@@ -2,10 +2,9 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG
 from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG
@@ -13,46 +12,47 @@
 from skrl.resources.noises.torch import GaussianNoise, OrnsteinUhlenbeckNoise
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 
 
-# Define the models (stochastic and deterministic models) for the agents using helper classes 
-# and programming with two approaches (layer by layer and torch.nn.Sequential class).
+# Define the models (stochastic and deterministic models) for the agents using mixins.
 # - StochasticActor: takes as input the environment's observation/state and returns an action
 # - DeterministicActor: takes as input the environment's observation/state and returns an action
 # - Critic: takes the state and action as input and provides a value to guide the policy
-class StochasticActor(GaussianModel):
+class StochasticActor(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.mean_action_layer = nn.Linear(32, self.num_actions)
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
 
-class DeterministicActor(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class DeterministicActor(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.action_layer = nn.Linear(32, self.num_actions)
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
 
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.action_layer(x))
+    def compute(self, states, taken_actions, role):
+        return self.net(states)
 
-class Critic(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class Critic(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations + self.num_actions, 32),
                                  nn.ELU(),
@@ -60,18 +60,12 @@ def __init__(self, observation_space, action_space, device, clip_actions = False
                                  nn.ELU(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(torch.cat([states, taken_actions], dim=1))
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Cartpole")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -86,25 +80,28 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # DDPG requires 4 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models
-models_ddpg = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-               "target_policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-               "critic": Critic(env.observation_space, env.action_space, device),
-               "target_critic": Critic(env.observation_space, env.action_space, device)}
+models_ddpg = {}
+models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_ddpg["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_ddpg["critic"] = Critic(env.observation_space, env.action_space, device)
+models_ddpg["target_critic"] = Critic(env.observation_space, env.action_space, device)
 # TD3 requires 6 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models
-models_td3 = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-              "target_policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-              "critic_1": Critic(env.observation_space, env.action_space, device),
-              "critic_2": Critic(env.observation_space, env.action_space, device),
-              "target_critic_1": Critic(env.observation_space, env.action_space, device),
-              "target_critic_2": Critic(env.observation_space, env.action_space, device)}
+models_td3 = {}
+models_td3["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_td3["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_td3["critic_1"] = Critic(env.observation_space, env.action_space, device)
+models_td3["critic_2"] = Critic(env.observation_space, env.action_space, device)
+models_td3["target_critic_1"] = Critic(env.observation_space, env.action_space, device)
+models_td3["target_critic_2"] = Critic(env.observation_space, env.action_space, device)
 # SAC requires 5 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models
-models_sac = {"policy": StochasticActor(env.observation_space, env.action_space, device, clip_actions=True),
-              "critic_1": Critic(env.observation_space, env.action_space, device),
-              "critic_2": Critic(env.observation_space, env.action_space, device),
-              "target_critic_1": Critic(env.observation_space, env.action_space, device),
-              "target_critic_2": Critic(env.observation_space, env.action_space, device)}
+models_sac = {}
+models_sac["policy"] = StochasticActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_sac["critic_1"] = Critic(env.observation_space, env.action_space, device)
+models_sac["critic_2"] = Critic(env.observation_space, env.action_space, device)
+models_sac["target_critic_1"] = Critic(env.observation_space, env.action_space, device)
+models_sac["target_critic_2"] = Critic(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ddpg.values():
diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py
index f612710d..e775c3c7 100644
--- a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py
+++ b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py
@@ -2,57 +2,51 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG
 from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG
 from skrl.agents.torch.sac import SAC, SAC_DEFAULT_CONFIG
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 
 
-# Define only the policies for evaluation 
-class StochasticActor(GaussianModel):
+# Define only the policies for evaluation
+class StochasticActor(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
-
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.mean_action_layer = nn.Linear(32, self.num_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter
-
-class DeterministicActor(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
-
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.action_layer = nn.Linear(32, self.num_actions)
-
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.action_layer(x))
-
-
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Cartpole")
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
+
+class DeterministicActor(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
+
+    def compute(self, states, taken_actions, role):
+        return self.net(states)
+
+
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -61,18 +55,16 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's policies.
 # DDPG requires 4 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models
-models_ddpg = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)}
+models_ddpg = {}
+models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
 # TD3 requires 6 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models
-models_td3 = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)}
+models_td3 = {}
+models_td3["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
 # SAC requires 5 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models
-models_sac = {"policy": StochasticActor(env.observation_space, env.action_space, device, clip_actions=True)}
-
-# load checkpoints
-models_ddpg["policy"].load("./runs/22-02-06_19-37-44-874837_DDPG/checkpoints/8000_policy.pt")
-models_td3["policy"].load("./runs/22-02-06_19-28-48-436345_TD3/checkpoints/5000_policy.pt")
-models_sac["policy"].load("./runs/22-02-06_19-28-48-441161_SAC/checkpoints/3000_policy.pt")
+models_sac = {}
+models_sac["policy"] = StochasticActor(env.observation_space, env.action_space, device, clip_actions=True)
 
 
 # Configure and instantiate the agents.
@@ -117,6 +109,11 @@ def compute(self, states, taken_actions):
                 action_space=env.action_space,
                 device=device)
 
+# load checkpoint (agent)
+agent_ddpg.load("./runs/22-09-12_22-30-58-982355_DDPG/checkpoints/agent_8000.pt")
+agent_td3.load("./runs/22-09-12_22-30-58-986295_TD3/checkpoints/agent_8000.pt")
+agent_sac.load("./runs/22-09-12_22-30-58-987142_SAC/checkpoints/agent_8000.pt")
+
 
 # Configure and instantiate the RL trainer
 cfg = {"timesteps": 8000, "headless": True}
diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py
index 18914141..ef93756e 100644
--- a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py
+++ b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py
@@ -2,10 +2,9 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG
 from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG
@@ -13,46 +12,47 @@
 from skrl.resources.noises.torch import GaussianNoise, OrnsteinUhlenbeckNoise
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 
 
-# Define the models (stochastic and deterministic models) for the agents using helper classes 
-# and programming with two approaches (layer by layer and torch.nn.Sequential class).
+# Define the models (stochastic and deterministic models) for the agents using mixins.
 # - StochasticActor: takes as input the environment's observation/state and returns an action
 # - DeterministicActor: takes as input the environment's observation/state and returns an action
 # - Critic: takes the state and action as input and provides a value to guide the policy
-class StochasticActor(GaussianModel):
+class StochasticActor(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.mean_action_layer = nn.Linear(32, self.num_actions)
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
 
-class DeterministicActor(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class DeterministicActor(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.action_layer = nn.Linear(32, self.num_actions)
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
 
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.action_layer(x))
+    def compute(self, states, taken_actions, role):
+        return self.net(states)
 
-class Critic(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class Critic(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations + self.num_actions, 32),
                                  nn.ELU(),
@@ -60,18 +60,12 @@ def __init__(self, observation_space, action_space, device, clip_actions = False
                                  nn.ELU(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(torch.cat([states, taken_actions], dim=1))
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Cartpole")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -84,25 +78,28 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # DDPG requires 4 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models
-models_ddpg = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-               "target_policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-               "critic": Critic(env.observation_space, env.action_space, device),
-               "target_critic": Critic(env.observation_space, env.action_space, device)}
+models_ddpg = {}
+models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_ddpg["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_ddpg["critic"] = Critic(env.observation_space, env.action_space, device)
+models_ddpg["target_critic"] = Critic(env.observation_space, env.action_space, device)
 # TD3 requires 6 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models
-models_td3 = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-              "target_policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-              "critic_1": Critic(env.observation_space, env.action_space, device),
-              "critic_2": Critic(env.observation_space, env.action_space, device),
-              "target_critic_1": Critic(env.observation_space, env.action_space, device),
-              "target_critic_2": Critic(env.observation_space, env.action_space, device)}
+models_td3 = {}
+models_td3["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_td3["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_td3["critic_1"] = Critic(env.observation_space, env.action_space, device)
+models_td3["critic_2"] = Critic(env.observation_space, env.action_space, device)
+models_td3["target_critic_1"] = Critic(env.observation_space, env.action_space, device)
+models_td3["target_critic_2"] = Critic(env.observation_space, env.action_space, device)
 # SAC requires 5 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models
-models_sac = {"policy": StochasticActor(env.observation_space, env.action_space, device, clip_actions=True),
-              "critic_1": Critic(env.observation_space, env.action_space, device),
-              "critic_2": Critic(env.observation_space, env.action_space, device),
-              "target_critic_1": Critic(env.observation_space, env.action_space, device),
-              "target_critic_2": Critic(env.observation_space, env.action_space, device)}
+models_sac = {}
+models_sac["policy"] = StochasticActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_sac["critic_1"] = Critic(env.observation_space, env.action_space, device)
+models_sac["critic_2"] = Critic(env.observation_space, env.action_space, device)
+models_sac["target_critic_1"] = Critic(env.observation_space, env.action_space, device)
+models_sac["target_critic_2"] = Critic(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ddpg.values():
diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py
index 6fc4d8fe..209a5d1c 100644
--- a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py
+++ b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py
@@ -2,57 +2,51 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG
 from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG
 from skrl.agents.torch.sac import SAC, SAC_DEFAULT_CONFIG
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 
 
-# Define only the policies for evaluation 
-class StochasticActor(GaussianModel):
+# Define only the policies for evaluation
+class StochasticActor(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
-
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.mean_action_layer = nn.Linear(32, self.num_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter
-
-class DeterministicActor(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
-
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.action_layer = nn.Linear(32, self.num_actions)
-
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.action_layer(x))
-
-
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Cartpole")
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
+
+class DeterministicActor(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
+
+    def compute(self, states, taken_actions, role):
+        return self.net(states)
+
+
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -61,18 +55,16 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's policies.
 # DDPG requires 4 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models
-models_ddpg = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)}
+models_ddpg = {}
+models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
 # TD3 requires 6 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models
-models_td3 = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)}
+models_td3 = {}
+models_td3["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
 # SAC requires 5 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models
-models_sac = {"policy": StochasticActor(env.observation_space, env.action_space, device, clip_actions=True)}
-
-# load checkpoints
-models_ddpg["policy"].load("./runs/22-02-06_19-37-44-874837_DDPG/checkpoints/8000_policy.pt")
-models_td3["policy"].load("./runs/22-02-06_19-28-48-436345_TD3/checkpoints/5000_policy.pt")
-models_sac["policy"].load("./runs/22-02-06_19-28-48-441161_SAC/checkpoints/3000_policy.pt")
+models_sac = {}
+models_sac["policy"] = StochasticActor(env.observation_space, env.action_space, device, clip_actions=True)
 
 
 # Configure and instantiate the agents.
@@ -117,6 +109,11 @@ def compute(self, states, taken_actions):
                 action_space=env.action_space,
                 device=device)
 
+# load checkpoint (agent)
+agent_ddpg.load("./runs/22-09-12_22-30-58-982355_DDPG/checkpoints/agent_8000.pt")
+agent_td3.load("./runs/22-09-12_22-30-58-986295_TD3/checkpoints/agent_8000.pt")
+agent_sac.load("./runs/22-09-12_22-30-58-987142_SAC/checkpoints/agent_8000.pt")
+
 
 # Configure and instantiate the RL trainer
 cfg = {"timesteps": 8000, "headless": True}
diff --git a/docs/source/examples/isaacgym/ppo_allegro_hand.py b/docs/source/examples/isaacgym/ppo_allegro_hand.py
index 52f793a8..927b8f5d 100644
--- a/docs/source/examples/isaacgym/ppo_allegro_hand.py
+++ b/docs/source/examples/isaacgym/ppo_allegro_hand.py
@@ -5,7 +5,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(128, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -52,7 +53,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(128, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
@@ -76,8 +77,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -88,7 +90,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 8
+cfg_ppo["rollouts"] = 8  # memory_size
 cfg_ppo["learning_epochs"] = 5
 cfg_ppo["mini_batches"] = 4  # 8 * 16384 / 32768
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/ppo_ant.py b/docs/source/examples/isaacgym/ppo_ant.py
index 0f8bc4cc..ad2ce0bf 100644
--- a/docs/source/examples/isaacgym/ppo_ant.py
+++ b/docs/source/examples/isaacgym/ppo_ant.py
@@ -4,14 +4,14 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 from skrl.utils import set_seed
 
 
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(64, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -52,18 +53,12 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(64, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Ant")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Ant")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Ant")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -76,8 +71,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -88,7 +84,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 16
+cfg_ppo["rollouts"] = 16  # memory_size
 cfg_ppo["learning_epochs"] = 4
 cfg_ppo["mini_batches"] = 2  # 16 * 4096 / 32768
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/ppo_anymal.py b/docs/source/examples/isaacgym/ppo_anymal.py
index c076aee1..656cc3fe 100644
--- a/docs/source/examples/isaacgym/ppo_anymal.py
+++ b/docs/source/examples/isaacgym/ppo_anymal.py
@@ -4,14 +4,14 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 from skrl.utils import set_seed
 
 
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(64, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -52,18 +53,12 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(64, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Anymal")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Anymal")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Anymal")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -76,8 +71,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -88,7 +84,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 24
+cfg_ppo["rollouts"] = 24  # memory_size
 cfg_ppo["learning_epochs"] = 5
 cfg_ppo["mini_batches"] = 3  # 24 * 4096 / 32768
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/ppo_anymal_terrain.py b/docs/source/examples/isaacgym/ppo_anymal_terrain.py
index 0b36e0bf..d64118f2 100644
--- a/docs/source/examples/isaacgym/ppo_anymal_terrain.py
+++ b/docs/source/examples/isaacgym/ppo_anymal_terrain.py
@@ -4,14 +4,14 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 from skrl.utils import set_seed
 
 
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(128, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -52,18 +53,12 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(128, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="AnymalTerrain")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("AnymalTerrain")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="AnymalTerrain")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -76,8 +71,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -88,7 +84,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 24
+cfg_ppo["rollouts"] = 24  # memory_size
 cfg_ppo["learning_epochs"] = 5
 cfg_ppo["mini_batches"] = 6  # 24 * 4096 / 16384
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/ppo_ball_balance.py b/docs/source/examples/isaacgym/ppo_ball_balance.py
index 84e9999a..620af8ce 100644
--- a/docs/source/examples/isaacgym/ppo_ball_balance.py
+++ b/docs/source/examples/isaacgym/ppo_ball_balance.py
@@ -4,14 +4,14 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 from skrl.utils import set_seed
 
 
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 128),
                                  nn.ELU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 128),
                                  nn.ELU(),
@@ -52,18 +53,12 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4).
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="BallBalance")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("BallBalance")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="BallBalance")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -80,15 +75,16 @@ def compute(self, states, taken_actions):
               "value": Value(env.observation_space, env.action_space, device)}
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 16
+cfg_ppo["rollouts"] = 16  # memory_size
 cfg_ppo["learning_epochs"] = 8
 cfg_ppo["mini_batches"] = 8  # 16 * 4096 / 8192
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/ppo_cartpole.py b/docs/source/examples/isaacgym/ppo_cartpole.py
index 260a298e..9525453c 100644
--- a/docs/source/examples/isaacgym/ppo_cartpole.py
+++ b/docs/source/examples/isaacgym/ppo_cartpole.py
@@ -4,14 +4,14 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 from skrl.utils import set_seed
 
 
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
@@ -35,12 +35,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
@@ -48,18 +49,12 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4).
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Cartpole")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -72,8 +67,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -84,7 +80,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 16
+cfg_ppo["rollouts"] = 16  # memory_size
 cfg_ppo["learning_epochs"] = 8
 cfg_ppo["mini_batches"] = 1  # 16 * 512 / 8192
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/ppo_cartpole_eval.py b/docs/source/examples/isaacgym/ppo_cartpole_eval.py
index 2c1c4a02..3ed1ff63 100644
--- a/docs/source/examples/isaacgym/ppo_cartpole_eval.py
+++ b/docs/source/examples/isaacgym/ppo_cartpole_eval.py
@@ -2,42 +2,36 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel
+from skrl.models.torch import Model, GaussianMixin
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 
 
 # Define only the policy for evaluation 
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
-
-        self.linear_layer_1 = nn.Linear(self.num_observations, 32)
-        self.linear_layer_2 = nn.Linear(32, 32)
-        self.mean_action_layer = nn.Linear(32, self.num_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
-        x = F.elu(self.linear_layer_1(states))
-        x = F.elu(self.linear_layer_2(x))
-        return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Cartpole")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -46,16 +40,16 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's policy.
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device, clip_actions=True)}
-
-# load checkpoint
-models_ppo["policy"].load("./runs/22-02-06_19-42-39-313520_PPO/checkpoints/8000_policy.pt")
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
 
 
 # Configure and instantiate the agent.
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
 cfg_ppo["random_timesteps"] = 0
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
 # logging to TensorBoard each 16 timesteps and ignore checkpoints
 cfg_ppo["experiment"]["write_interval"] = 16
 cfg_ppo["experiment"]["checkpoint_interval"] = 0
@@ -67,9 +61,12 @@ def compute(self, states, taken_actions):
             action_space=env.action_space,
             device=device)
 
+# load checkpoint (agent)
+agent.load("./runs/22-09-12_18-56-10-110956_PPO/checkpoints/agent_1600.pt")
+
 
 # Configure and instantiate the RL trainer
-cfg_trainer = {"timesteps": 8000, "headless": True}
+cfg_trainer = {"timesteps": 1600, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # evaluate the agent
diff --git a/docs/source/examples/isaacgym/ppo_franka_cabinet.py b/docs/source/examples/isaacgym/ppo_franka_cabinet.py
index f2104a67..690ad036 100644
--- a/docs/source/examples/isaacgym/ppo_franka_cabinet.py
+++ b/docs/source/examples/isaacgym/ppo_franka_cabinet.py
@@ -4,14 +4,14 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 from skrl.utils import set_seed
 
 
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(64, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -52,18 +53,12 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(64, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="FrankaCabinet")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("FrankaCabinet")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="FrankaCabinet")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -76,8 +71,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -88,7 +84,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 16
+cfg_ppo["rollouts"] = 16  # memory_size
 cfg_ppo["learning_epochs"] = 8
 cfg_ppo["mini_batches"] = 8  # 16 * 4096 / 8192    
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/ppo_humanoid.py b/docs/source/examples/isaacgym/ppo_humanoid.py
index 4fa4d43d..1a1272f9 100644
--- a/docs/source/examples/isaacgym/ppo_humanoid.py
+++ b/docs/source/examples/isaacgym/ppo_humanoid.py
@@ -4,14 +4,14 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 from skrl.utils import set_seed
 
 
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 400),
                                  nn.ELU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(100, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 400),
                                  nn.ELU(),
@@ -52,18 +53,12 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(100, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Humanoid")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Humanoid")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Humanoid")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -76,8 +71,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -88,7 +84,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 32
+cfg_ppo["rollouts"] = 32  # memory_size
 cfg_ppo["learning_epochs"] = 5
 cfg_ppo["mini_batches"] = 4  # 32 * 4096 / 32768
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/ppo_ingenuity.py b/docs/source/examples/isaacgym/ppo_ingenuity.py
index b21af817..84c7570b 100644
--- a/docs/source/examples/isaacgym/ppo_ingenuity.py
+++ b/docs/source/examples/isaacgym/ppo_ingenuity.py
@@ -5,7 +5,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(128, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -52,7 +53,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(128, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
@@ -76,8 +77,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -88,7 +90,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 16
+cfg_ppo["rollouts"] = 16  # memory_size
 cfg_ppo["learning_epochs"] = 8
 cfg_ppo["mini_batches"] = 4  # 16 * 4096 / 16384
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/ppo_quadcopter.py b/docs/source/examples/isaacgym/ppo_quadcopter.py
index 883e31f4..06289885 100644
--- a/docs/source/examples/isaacgym/ppo_quadcopter.py
+++ b/docs/source/examples/isaacgym/ppo_quadcopter.py
@@ -4,14 +4,14 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 from skrl.utils import set_seed
 
 
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -37,12 +37,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(128, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -52,18 +53,12 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(128, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4).
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Quadcopter")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Quadcopter")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Quadcopter")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -76,8 +71,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -88,7 +84,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 8
+cfg_ppo["rollouts"] = 8  # memory_size
 cfg_ppo["learning_epochs"] = 8
 cfg_ppo["mini_batches"] = 4  # 8 * 8192 / 16384
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/ppo_shadow_hand.py b/docs/source/examples/isaacgym/ppo_shadow_hand.py
index 24f29a7f..2f184912 100644
--- a/docs/source/examples/isaacgym/ppo_shadow_hand.py
+++ b/docs/source/examples/isaacgym/ppo_shadow_hand.py
@@ -4,14 +4,14 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 from skrl.utils import set_seed
 
 
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -39,12 +39,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(128, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -56,18 +57,12 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(128, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="ShadowHand")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("ShadowHand")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="ShadowHand")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -80,8 +75,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -92,7 +88,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 8
+cfg_ppo["rollouts"] = 8  # memory_size
 cfg_ppo["learning_epochs"] = 5
 cfg_ppo["mini_batches"] = 4  # 8 * 16384 / 32768
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/ppo_trifinger.py b/docs/source/examples/isaacgym/ppo_trifinger.py
index 404f4a03..898d5bb6 100644
--- a/docs/source/examples/isaacgym/ppo_trifinger.py
+++ b/docs/source/examples/isaacgym/ppo_trifinger.py
@@ -4,14 +4,14 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 from skrl.utils import set_seed
 
 
@@ -19,14 +19,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -39,12 +39,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(128, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -56,18 +57,12 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(128, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4). 
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Trifinger")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Trifinger")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Trifinger")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -80,8 +75,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
@@ -92,7 +88,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 8
+cfg_ppo["rollouts"] = 8  # memory_size
 cfg_ppo["learning_epochs"] = 4
 cfg_ppo["mini_batches"] = 8  # 8 * 16384 / 16384
 cfg_ppo["discount_factor"] = 0.99
diff --git a/docs/source/examples/isaacgym/trpo_cartpole.py b/docs/source/examples/isaacgym/trpo_cartpole.py
index 31266a55..8b0a76e7 100644
--- a/docs/source/examples/isaacgym/trpo_cartpole.py
+++ b/docs/source/examples/isaacgym/trpo_cartpole.py
@@ -4,13 +4,13 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.trpo import TRPO, TRPO_DEFAULT_CONFIG
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
-from skrl.envs.torch import load_isaacgym_env_preview2, load_isaacgym_env_preview4
+from skrl.envs.torch import load_isaacgym_env_preview4
 from skrl.utils import set_seed
 
 
@@ -18,14 +18,14 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
@@ -34,12 +34,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
@@ -47,18 +48,12 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.ELU(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
-# Load and wrap the Isaac Gym environment.
-# The following lines are intended to support all versions (preview 2, 3 and 4).
-# It tries to load from preview 3/4, but if it fails, it will try to load from preview 2
-try:
-    env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
-except Exception as e:
-    print("Isaac Gym (preview 3/4) failed: {}\nTrying preview 2...".format(e))
-    env = load_isaacgym_env_preview2("Cartpole")
+# Load and wrap the Isaac Gym environment
+env = load_isaacgym_env_preview4(task_name="Cartpole")   # preview 3 and 4 use the same loader
 env = wrap_env(env)
 
 device = env.device
@@ -71,8 +66,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # TRPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#spaces-and-models
-models_trpo = {"policy": Policy(env.observation_space, env.action_space, device),
-               "value": Value(env.observation_space, env.action_space, device)}
+models_trpo = {}
+models_trpo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_trpo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_trpo.values():
@@ -83,7 +79,7 @@ def compute(self, states, taken_actions):
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#configuration-and-hyperparameters
 cfg_trpo = TRPO_DEFAULT_CONFIG.copy()
-cfg_trpo["rollouts"] = 16
+cfg_trpo["rollouts"] = 16  # memory_size
 cfg_trpo["learning_epochs"] = 6
 cfg_trpo["mini_batches"] = 2
 cfg_trpo["grad_norm_clip"] = 0.5
@@ -106,7 +102,7 @@ def compute(self, states, taken_actions):
 
 
 # Configure and instantiate the RL trainer
-cfg_trainer = {"timesteps": 2500, "headless": True}
+cfg_trainer = {"timesteps": 1600, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # start training

From d92c403e40bc79265ef502d8ab9f21a57b0aa8b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 12 Sep 2022 23:05:58 +0200
Subject: [PATCH 071/108] Set role argument when calling policy get_log_std
 method

---
 skrl/agents/torch/trpo/trpo.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py
index e9766f8c..fa9d65d0 100644
--- a/skrl/agents/torch/trpo/trpo.py
+++ b/skrl/agents/torch/trpo/trpo.py
@@ -432,11 +432,11 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor
             :rtype: torch.Tensor
             """
             _, _, mu_1 = policy_1.act(states, taken_actions=None, role="policy")
-            logstd_1 = policy_1.get_log_std()
+            logstd_1 = policy_1.get_log_std(role="policy")
             mu_1, logstd_1 = mu_1.detach(), logstd_1.detach()
 
             _, _, mu_2 = policy_2.act(states, taken_actions=None, role="policy")
-            logstd_2 = policy_2.get_log_std()
+            logstd_2 = policy_2.get_log_std(role="policy")
             
             kl = logstd_1 - logstd_2 + 0.5 * (torch.square(logstd_1.exp()) + torch.square(mu_1 - mu_2)) \
                / torch.square(logstd_2.exp()) - 0.5

From f9608d3e80061a6f756033a92026b767e533ba55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 12 Sep 2022 23:30:04 +0200
Subject: [PATCH 072/108] Update CHANGELOG

---
 CHANGELOG.md | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b48c1b53..bc1a765f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,12 +5,29 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ## [0.8.0] - Unreleased
 ### Added
 - AMP agent for physics-based character animation
-- Gaussian model
 - Manual trainer
+- Gaussian model mixin
+- Support for creating shared models
+- Parameter `role` to model methods
+- Wrapper compatibility with the new OpenAI Gym environment API (by @JohannLange)
+- Internal library colored logger
+- Migrate checkpoints/models from other RL libraries to skrl models/agents
+- Configuration parameter `store_separately` to agent configuration dict
+- Save/load agent modules (models, optimizers, preprocessors)
 
 ### Changed
-- Multivariate Gaussian model (`GaussianModel` until 0.7.0) to `MultivariateGaussianModel`
+- Models implementation as Python mixin [**breaking change**]
+- Multivariate Gaussian model (`GaussianModel` until 0.7.0) to `MultivariateGaussianMixin`
 - Trainer's `cfg` parameter position and default values
+- Show training/evaluadion display progress using `tqdm` (by @JohannLange)
+
+### Fixed
+- Missing recursive arguments during model weights initialization
+- Tensor dimension when computing preprocessor parallel variance
+
+### Removed
+- Parameter `inference` from model methods
+- Configuration parameter `checkpoint_policy_only` from agent configuration dict
 
 ## [0.7.0] - 2022-07-11
 ### Added

From bbcbbbd8f321f99ecbbed03be152316fa367d1e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 12 Sep 2022 23:33:20 +0200
Subject: [PATCH 073/108] Update Isaac Gym and Omniverse Isaac Gym examples in
 docs

---
 .../examples/isaacgym/ppo_ball_balance.py     |   7 +-
 docs/source/intro/examples.rst                | 118 +++++++-----------
 2 files changed, 51 insertions(+), 74 deletions(-)

diff --git a/docs/source/examples/isaacgym/ppo_ball_balance.py b/docs/source/examples/isaacgym/ppo_ball_balance.py
index 620af8ce..bacde4d7 100644
--- a/docs/source/examples/isaacgym/ppo_ball_balance.py
+++ b/docs/source/examples/isaacgym/ppo_ball_balance.py
@@ -71,14 +71,13 @@ def compute(self, states, taken_actions, role):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device),
-              "value": Value(env.observation_space, env.action_space, device)}
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
 models_ppo = {}
 models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
 models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
+# Initialize the models' parameters (weights and biases) using a Gaussian distribution
+for model in models_ppo.values():
+    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index ee9de042..a9abc232 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -273,10 +273,10 @@ The following components or practices are exemplified (highlighted):
 
    <hr>
 
-Learning in an Isaac Gym environment (one agent, multiple environments)
------------------------------------------------------------------------
+Learning in an Isaac Gym environment
+------------------------------------
 
-These examples perform the training of an agent in the `Isaac Gym environments <https://github.com/NVIDIA-Omniverse/IsaacGymEnvs>`_. Some scripts try to load the environment from preview 4 (or preview 3), but if they fail, they will try to load the environment from preview 2
+These examples perform the training of an agent in the `Isaac Gym environments <https://github.com/NVIDIA-Omniverse/IsaacGymEnvs>`_ (**one agent, multiple environments**)
 
 .. image:: ../_static/imgs/example_isaacgym.png
       :width: 100%
@@ -333,129 +333,115 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
             
             .. tab:: AllegroHand
                 
-                View the raw code: `ppo_allegro_hand.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_allegro_hand.py>`_
+                :download:`ppo_allegro_hand.py <../examples/isaacgym/ppo_allegro_hand.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_allegro_hand.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 2, 60-66
+                    :emphasize-lines: 2, 61-67
 
             .. tab:: Ant
                 
-                View the raw code: `ppo_ant.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_ant.py>`_
+                :download:`ppo_ant.py <../examples/isaacgym/ppo_ant.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_ant.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 13-14, 62-67
+                    :emphasize-lines: 13-14, 61-62
 
             .. tab:: Anymal
                 
-                View the raw code: `ppo_anymal.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_anymal.py>`_
+                :download:`ppo_anymal.py <../examples/isaacgym/ppo_anymal.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_anymal.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 13-14, 62-67
+                    :emphasize-lines: 13-14, 61-62
 
             .. tab:: AnymalTerrain
                 
-                View the raw code: `ppo_anymal_terrain.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_anymal_terrain.py>`_
+                :download:`ppo_anymal_terrain.py <../examples/isaacgym/ppo_anymal_terrain.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_anymal_terrain.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 11, 109-112
+                    :emphasize-lines: 11, 105-108
 
             .. tab:: BallBalance
                 
-                View the raw code: `ppo_ball_balance.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_ball_balance.py>`_
+                :download:`ppo_ball_balance.py <../examples/isaacgym/ppo_ball_balance.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_ball_balance.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 11, 108-111
+                    :emphasize-lines: 11, 104-107
 
             .. tab:: Cartpole
                 
-                View the raw code: `ppo_cartpole.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_cartpole.py>`_
+                :download:`ppo_cartpole.py <../examples/isaacgym/ppo_cartpole.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_cartpole.py
                     :language: python
-                    :linenos:
                     :emphasize-lines: 15, 19
 
             .. tab:: Cartpole (TRPO)
                 
-                View the raw code: `trpo_cartpole.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/trpo_cartpole.py>`_
+                :download:`trpo_cartpole.py <../examples/isaacgym/trpo_cartpole.py>`
 
                 .. literalinclude:: ../examples/isaacgym/trpo_cartpole.py
                     :language: python
-                    :linenos:
                     :emphasize-lines: 14, 18
 
             .. tab:: FrankaCabinet
                 
-                View the raw code: `ppo_franka_cabinet.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_franka_cabinet.py>`_
+                :download:`ppo_franka_cabinet.py <../examples/isaacgym/ppo_franka_cabinet.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_franka_cabinet.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 10, 97-98
+                    :emphasize-lines: 10, 93-94
 
             .. tab:: Humanoid
                 
-                View the raw code: `ppo_humanoid.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_humanoid.py>`_
+                :download:`ppo_humanoid.py <../examples/isaacgym/ppo_humanoid.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_humanoid.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 10, 97-98
+                    :emphasize-lines: 10, 93-94
 
             .. tab:: Humanoid (AMP)
                 
-                View the raw code: `amp_humanoid.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/amp_humanoid.py>`_
+                :download:`amp_humanoid.py <../examples/isaacgym/amp_humanoid.py>`
 
                 .. literalinclude:: ../examples/isaacgym/amp_humanoid.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 86, 120, 131, 134-135
+                    :emphasize-lines: 89, 124, 135, 138-139
 
             .. tab:: Ingenuity
                 
-                View the raw code: `ppo_ingenuity.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_ingenuity.py>`_
+                :download:`ppo_ingenuity.py <../examples/isaacgym/ppo_ingenuity.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_ingenuity.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 2, 60-66
+                    :emphasize-lines: 2, 61-67
 
             .. tab:: Quadcopter
                 
-                View the raw code: `ppo_quadcopter.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_quadcopter.py>`_
+                :download:`ppo_quadcopter.py <../examples/isaacgym/ppo_quadcopter.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_quadcopter.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 108
+                    :emphasize-lines: 104
 
             .. tab:: ShadowHand
                 
-                View the raw code: `ppo_shadow_hand.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_shadow_hand.py>`_
+                :download:`ppo_shadow_hand.py <../examples/isaacgym/ppo_shadow_hand.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_shadow_hand.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 112
+                    :emphasize-lines: 108
 
             .. tab:: Trifinger
                 
-                View the raw code: `ppo_trifinger.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_trifinger.py>`_
+                :download:`ppo_trifinger.py <../examples/isaacgym/ppo_trifinger.py>`
 
                 .. literalinclude:: ../examples/isaacgym/ppo_trifinger.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 112
+                    :emphasize-lines: 108
 
     .. tab:: Isaac Gym environments (evaluation)
 
@@ -463,7 +449,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
             
             .. tab:: Cartpole
                 
-                View the raw code: `ppo_cartpole_eval.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/ppo_cartpole_eval.py>`_
+                :download:`ppo_cartpole_eval.py <../examples/isaacgym/ppo_cartpole_eval.py>`
                 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
@@ -471,8 +457,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_cartpole_eval.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 49, 52, 76
+                    :emphasize-lines: 65
 
 .. raw:: html
 
@@ -589,10 +574,10 @@ The following components or practices are exemplified (highlighted):
 
    <hr>
 
-Learning in an Omniverse Isaac Gym environment (one agent, multiple environments)
----------------------------------------------------------------------------------
+Learning in an Omniverse Isaac Gym environment
+----------------------------------------------
 
-These examples perform the training of an agent in the `Omniverse Isaac Gym environments <https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs>`_
+These examples perform the training of an agent in the `Omniverse Isaac Gym environments <https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs>`_ (**one agent, multiple environments**)
 
 .. image:: ../_static/imgs/example_omniverse_isaacgym.png
       :width: 100%
@@ -643,66 +628,59 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
             .. tab:: AllegroHand
                 
-                View the raw code: `omniverse\: ppo_allegro_hand.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/omniisaacgym/ppo_allegro_hand.py>`_
+                :download:`ppo_allegro_hand.py <../examples/omniisaacgym/ppo_allegro_hand.py>`
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_allegro_hand.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 11-12, 58-59
+                    :emphasize-lines: 11-12, 59-60
             
             .. tab:: Ant
                 
-                View the raw code: `omniverse\: ppo_ant.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/omniisaacgym/ppo_ant.py>`_
+                :download:`ppo_ant.py <../examples/omniisaacgym/ppo_ant.py>`
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_ant.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 11-12, 58-59
+                    :emphasize-lines: 11-12, 59-60
 
             .. tab:: Ant (multi-threaded)
                 
-                View the raw code: `omniverse\: ppo_ant_mt.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/omniisaacgym/ppo_ant_mt.py>`_
+                :download:`ppo_ant_mt.py <../examples/omniisaacgym/ppo_ant_mt.py>`
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_ant_mt.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 1, 13-14, 60-61, 124, 128
+                    :emphasize-lines: 1, 13-14, 61-62, 126, 130
 
             .. tab:: Cartpole
                 
-                View the raw code: `omniverse\: ppo_cartpole.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/omniisaacgym/ppo_cartpole.py>`_
+                :download:`ppo_cartpole.py <../examples/omniisaacgym/ppo_cartpole.py>`
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_cartpole.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 11-12, 54-55
+                    :emphasize-lines: 11-12, 55-56
 
             .. tab:: Cartpole (multi-threaded)
                 
-                View the raw code: `omniverse\: ppo_cartpole_mt.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py>`_
+                :download:`ppo_cartpole_mt.py <../examples/omniisaacgym/ppo_cartpole_mt.py>`
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_cartpole_mt.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 1, 13-14, 56-57, 120, 124
+                    :emphasize-lines: 1, 13-14, 57-58, 122, 126
                     
             .. tab:: Humanoid
                 
-                View the raw code: `omniverse\: ppo_humanoid.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/omniisaacgym/ppo_humanoid.py>`_
+                :download:`ppo_humanoid.py <../examples/omniisaacgym/ppo_humanoid.py>`
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_humanoid.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 11-12, 58-59
+                    :emphasize-lines: 11-12, 59-60
                     
             .. tab:: ShadowHand
                 
-                View the raw code: `omniverse\: ppo_shadow_hand.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/omniisaacgym/ppo_shadow_hand.py>`_
+                :download:`ppo_shadow_hand.py <../examples/omniisaacgym/ppo_shadow_hand.py>`
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_shadow_hand.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 11-12, 62-63
+                    :emphasize-lines: 11-12, 63-64
 
 .. raw:: html
 

From c0fffe33daf1294c904b6320b0dc5ac53e35c11c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 12 Sep 2022 23:34:54 +0200
Subject: [PATCH 074/108] Update checkpoint section in docs

---
 docs/source/intro/data.rst | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/docs/source/intro/data.rst b/docs/source/intro/data.rst
index 9deef70b..fa018b8f 100644
--- a/docs/source/intro/data.rst
+++ b/docs/source/intro/data.rst
@@ -23,7 +23,7 @@ Each agent offers the following parameters under the :literal:`"experiment"` key
             "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
             "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-            "checkpoint_policy_only": True,     # checkpoint for policy only
+            "store_separately": False,          # whether to store checkpoints separately
         }
     }
 
@@ -126,13 +126,13 @@ Tracking custom metrics/scales
 
 ----------------
 
-Model checkpoint
-----------------
+Checkpoints
+-----------
 
 Saving checkpoints
 ^^^^^^^^^^^^^^^^^^
 
-The checkpoints are saved in the :literal:`checkpoints` subdirectory of the experiment's directory (its path can be customized using the options described in the previous subsection). The checkpoint name is the current timestep and the key referring to the model (e.g. :literal:`runs/22-01-09_22-48-49-816281_DDPG/checkpoints/2500_policy.pt`)
+The checkpoints are saved in the :literal:`checkpoints` subdirectory of the experiment's directory (its path can be customized using the options described in the previous subsection). The checkpoint name is the key referring to the agent (or models, optimizers and preprocessors) and the current timestep (e.g. :literal:`runs/22-01-09_22-48-49-816281_DDPG/checkpoints/agent_2500.pt`)
 
 The checkpoint management, as in the previous case, is the responsibility of the agents (**can be customized independently for each agent using its configuration dictionary**)
 
@@ -148,30 +148,30 @@ The checkpoint management, as in the previous case, is the responsibility of the
             "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
             "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-            "checkpoint_policy_only": True,     # checkpoint for policy only
+            "store_separately": False,          # whether to store checkpoints separately
         }
     }
 
 * **checkpoint_interval**: interval for checkpoints (default is 1000 timesteps). A value equal to or less than 0 disables the checkpoint creation
 
-* **checkpoint_policy_only**: if set to :literal:`True`, only the policy will be saved (default behaviour), otherwise all the agent's models (policy, value function, critic, .etc) will be checkpointed
+* **store_separately**: if set to :literal:`True`, all the modules that an agent contains (models, optimizers, preprocessors, etc.) will be saved each one in a separate file. By default (:literal:`False`) the modules are grouped in a dictionary and stored in the same file
 
 **Checkpointing the best models**
 
-The best models, attending the mean total reward, will be saved in the :literal:`checkpoints` subdirectory of the experiment's directory. The checkpoint name is the word :literal:`best` and the key referring to the model (e.g. :literal:`runs/22-01-09_22-48-49-816281_DDPG/checkpoints/best_policy.pt`)
+The best models, attending the mean total reward, will be saved in the :literal:`checkpoints` subdirectory of the experiment's directory. The checkpoint name is the word :literal:`best` and the key referring to the model (e.g. :literal:`runs/22-01-09_22-48-49-816281_DDPG/checkpoints/best_agent.pt`)
 
-The best models are updated internally on each TensorBoard writing interval :literal:`"write_interval"` and they are saved on each checkpoint interval :literal:`"checkpoint_interval"`. The :literal:`"checkpoint_policy_only"` key specifies whether the best policy or the best models (policy, value function, critic, .etc) will be checkpointed
+The best models are updated internally on each TensorBoard writing interval :literal:`"write_interval"` and they are saved on each checkpoint interval :literal:`"checkpoint_interval"`. The :literal:`"store_separately"` key specifies whether the best modules are grouped and stored together or separately
 
 Loading checkpoints
 ^^^^^^^^^^^^^^^^^^^
 
-Checkpoints can be loaded for each of the instantiated models independently via the :literal:`.load(...)` method (`Model.load <../modules/skrl.models.base_class.html#skrl.models.torch.base.Model.load>`_). It accepts the path (relative or absolute) of the checkpoint to load as the only argument. The checkpoint will be dynamically mapped to the device specified as argument in the class constructor (internally the torch load's :literal:`map_location` method is used during loading)
+Checkpoints can be loaded for each of the instantiated agents (or models) independently via the :literal:`.load(...)` method (`Agent.load <../modules/skrl.agents.base_class.html#skrl.agents.torch.base.Agent.load>`_ or `Model.load <../modules/skrl.models.base_class.html#skrl.models.torch.base.Model.load>`_). It accepts the path (relative or absolute) of the checkpoint to load as the only argument. The checkpoint will be dynamically mapped to the device specified as argument in the class constructor (internally the torch load's :literal:`map_location` method is used during loading)
 
 .. note::
 
-    The model instance must have the same architecture/structure as the one used to save the checkpoint. The current implementation load the model's `state_dict <https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict>`_ directly
+    The agents or models instances must have the same architecture/structure as the one used to save the checkpoint. The current implementation load the model's `state_dict <https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict>`_ directly
 
-The following code shows how to load the checkpoint (e.g. :literal:`runs/22-01-09_22-48-49-816281_DDPG/checkpoints/2500_policy.pt`) of an instantiated policy from a specific definition. See the section :ref:`Examples <examples>` for details about how to load control points and use them to evaluate experiments
+The following code shows how to load the checkpoint (e.g. :literal:`runs/22-01-09_22-48-49-816281_DDPG/checkpoints/2500_policy.pt`) of an instantiated policy from a specific definition. See the :ref:`Examples <examples>` section for showcases about how to load control points and use them to continue the training or evaluate experiments
 
 .. code-block:: python
     :emphasize-lines: 21
@@ -198,6 +198,11 @@ The following code shows how to load the checkpoint (e.g. :literal:`runs/22-01-0
     # Load the checkpoint
     policy.load("./runs/22-01-09_22-48-49-816281_DDPG/checkpoints/2500_policy.pt")
 
+Migrating external checkpoints
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+It is possible to load checkpoints generated with external reinforcement learning libraries into skrl agents (or models) via the :literal:`.migrate(...)` method (`Agent.migrate <../modules/skrl.agents.base_class.html#skrl.agents.torch.base.Agent.migrate>`_ or `Model.migrate <../modules/skrl.models.base_class.html#skrl.models.torch.base.Model.migrate>`_).
+
 --------------------
 
 Memory export/import

From 9fb7cb494ba940debf9026b9a539862dda5b3068 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 13 Sep 2022 18:25:42 +0200
Subject: [PATCH 075/108] Update DeepMind examples

---
 .../deepmind/dm_manipulation_stack_sac.py     | 32 +++++++++---------
 .../dm_suite_cartpole_swingup_ddpg.py         | 33 ++++++++++---------
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/docs/source/examples/deepmind/dm_manipulation_stack_sac.py b/docs/source/examples/deepmind/dm_manipulation_stack_sac.py
index 7890429c..55cc071f 100644
--- a/docs/source/examples/deepmind/dm_manipulation_stack_sac.py
+++ b/docs/source/examples/deepmind/dm_manipulation_stack_sac.py
@@ -4,21 +4,21 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.sac import SAC, SAC_DEFAULT_CONFIG
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 
 
-# Define the models (stochastic and deterministic models) for the SAC agent using the helper classes
+# Define the models (stochastic and deterministic models) for the SAC agent using the mixins.
 # - StochasticActor (policy): takes as input the environment's observation/state and returns an action
 # - Critic: takes the state and action as input and provides a value to guide the policy
-class StochasticActor(GaussianModel):
+class StochasticActor(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.features_extractor = nn.Sequential(nn.Conv2d(3, 32, kernel_size=8, stride=3),
                                                 nn.ReLU(),
@@ -40,7 +40,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
 
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         # The dm_control.manipulation tasks have as observation/state spec a `collections.OrderedDict` object as follows:
         # OrderedDict([('front_close', BoundedArray(shape=(1, 84, 84, 3), dtype=dtype('uint8'), name='front_close', minimum=0, maximum=255)), 
         #              ('jaco_arm/joints_pos', Array(shape=(1, 6, 2), dtype=dtype('float64'), name='jaco_arm/joints_pos')), 
@@ -83,9 +83,10 @@ def compute(self, states, taken_actions):
                                               input["jaco_arm/joints_pos"].view(states.shape[0], -1), 
                                               input["jaco_arm/joints_vel"].view(states.shape[0], -1)], dim=-1))), self.log_std_parameter
         
-class Critic(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class Critic(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.features_extractor = nn.Sequential(nn.Conv2d(3, 32, kernel_size=8, stride=3),
                                                 nn.ReLU(),
@@ -105,7 +106,7 @@ def __init__(self, observation_space, action_space, device, clip_actions = False
                                  nn.ReLU(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         # map the observations/states to the original space. 
         # See the explanation above (StochasticActor.compute)
         input = self.tensor_to_space(states, self.observation_space)
@@ -133,11 +134,12 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # SAC requires 5 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models
-models_sac = {"policy": StochasticActor(env.observation_space, env.action_space, device, clip_actions=True),
-              "critic_1": Critic(env.observation_space, env.action_space, device),
-              "critic_2": Critic(env.observation_space, env.action_space, device),
-              "target_critic_1": Critic(env.observation_space, env.action_space, device),
-              "target_critic_2": Critic(env.observation_space, env.action_space, device)}
+models_sac = {}
+models_sac["policy"] = StochasticActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_sac["critic_1"] = Critic(env.observation_space, env.action_space, device)
+models_sac["critic_2"] = Critic(env.observation_space, env.action_space, device)
+models_sac["target_critic_1"] = Critic(env.observation_space, env.action_space, device)
+models_sac["target_critic_2"] = Critic(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_sac.values():
diff --git a/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py b/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py
index bc3c63db..f0469fb3 100644
--- a/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py
+++ b/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py
@@ -5,7 +5,7 @@
 import torch.nn.functional as F
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import DeterministicModel
+from skrl.models.torch import Model, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG
 from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise
@@ -13,26 +13,28 @@
 from skrl.envs.torch import wrap_env
 
 
-# Define the models (deterministic models) for the DDPG agent using a helper class
-# and programming with two approaches (layer by layer and torch.nn.Sequential class).
+# Define the models (deterministic models) for the DDPG agent using mixins
+# and programming with two approaches (torch functional and torch.nn.Sequential class).
 # - Actor (policy): takes as input the environment's observation/state and returns an action
 # - Critic: takes the state and action as input and provides a value to guide the policy 
-class DeterministicActor(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class DeterministicActor(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.linear_layer_1 = nn.Linear(self.num_observations, 400)
         self.linear_layer_2 = nn.Linear(400, 300)
         self.action_layer = nn.Linear(300, self.num_actions)
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         x = F.relu(self.linear_layer_1(states))
         x = F.relu(self.linear_layer_2(x))
         return torch.tanh(self.action_layer(x))
 
-class DeterministicCritic(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class DeterministicCritic(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations + self.num_actions, 400),
                                  nn.ReLU(),
@@ -40,7 +42,7 @@ def __init__(self, observation_space, action_space, device, clip_actions = False
                                  nn.ReLU(),
                                  nn.Linear(300, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(torch.cat([states, taken_actions], dim=1))
 
 
@@ -58,10 +60,11 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # DDPG requires 4 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models
-models_ddpg = {"policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-               "target_policy": DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True),
-               "critic": DeterministicCritic(env.observation_space, env.action_space, device),
-               "target_critic": DeterministicCritic(env.observation_space, env.action_space, device)}
+models_ddpg = {}
+models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_ddpg["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device, clip_actions=True)
+models_ddpg["critic"] = DeterministicCritic(env.observation_space, env.action_space, device)
+models_ddpg["target_critic"] = DeterministicCritic(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ddpg.values():

From a1fceb43b95367640ded766239290c2c6935ab66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 13 Sep 2022 18:52:15 +0200
Subject: [PATCH 076/108] Update Isaac Gym and DeepMind examples in docs

---
 docs/source/intro/examples.rst | 55 ++++++++++++++--------------------
 1 file changed, 23 insertions(+), 32 deletions(-)

diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index a9abc232..ca0756ce 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -222,10 +222,10 @@ The following components or practices are exemplified (highlighted):
 
    <hr>
 
-Learning in a DeepMind environment (one agent, one environment)
----------------------------------------------------------------
+Learning in a DeepMind environment
+----------------------------------
 
-This example performs the training of one agent in an DeepMind environment
+These examples perform the training of one agent in an DeepMind environment (**one agent, one environment**)
 
 .. image:: ../_static/imgs/example_deepmind.png
       :width: 100%
@@ -249,12 +249,11 @@ The following components or practices are exemplified (highlighted):
             
             .. group-tab:: Training
 
-                View the raw code: `dm_suite_cartpole_swingup_ddpg.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py>`_
+                :download:`dm_suite_cartpole_swingup_ddpg.py <../examples/deepmind/dm_suite_cartpole_swingup_ddpg.py>`
 
                 .. literalinclude:: ../examples/deepmind/dm_suite_cartpole_swingup_ddpg.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 1, 13, 48-49, 93
+                    :emphasize-lines: 1, 13, 50-51
     
     .. tab:: manipulation:reach_site_vision (SAC)
 
@@ -262,12 +261,11 @@ The following components or practices are exemplified (highlighted):
             
             .. group-tab:: Training
 
-                View the raw code: `dm_manipulation_stack_sac.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/deepmind/dm_manipulation_stack_sac.py>`_
+                :download:`dm_manipulation_stack_sac.py <../examples/deepmind/dm_manipulation_stack_sac.py>`
 
                 .. literalinclude:: ../examples/deepmind/dm_manipulation_stack_sac.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 67, 80, 83-84, 111, 114, 117-118
+                    :emphasize-lines: 67, 80, 83-84, 112, 115, 118-119
 
 .. raw:: html
 
@@ -463,10 +461,10 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
    <hr>
 
-Learning by scopes in an Isaac Gym environment (multiple agents and environments)
----------------------------------------------------------------------------------
+Learning by scopes in an Isaac Gym environment
+----------------------------------------------
 
-This example performs the training of 3 agents by scopes in Isaac Gym's Cartpole environment in the same run. It tries to load the environment from preview 4 (or preview 3), but if it fails, it will try to load the environment from preview 2
+These examples perform the training of 3 agents by scopes in Isaac Gym's Cartpole environment in the same run (**multiple agents and environments**)
 
 .. image:: ../_static/imgs/example_parallel.jpg
       :width: 100%
@@ -502,16 +500,15 @@ The following components or practices are exemplified (highlighted):
             
             .. tab:: Sequential training
                 
-                View the raw code: `isaacgym_sequential_shared_memory.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py>`_
+                :download:`isaacgym_sequential_shared_memory.py <../examples/isaacgym/isaacgym_sequential_shared_memory.py>`
 
                 .. literalinclude:: ../examples/isaacgym/isaacgym_sequential_shared_memory.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 81, 152, 159, 166, 177-178
+                    :emphasize-lines: 75, 149, 156, 163, 174-175
 
             .. tab:: Sequential evaluation
                 
-                View the raw code: `isaacgym_sequential_shared_memory_eval.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py>`_
+                :download:`isaacgym_sequential_shared_memory_eval.py <../examples/isaacgym/isaacgym_sequential_shared_memory_eval.py>`
                 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
@@ -519,8 +516,7 @@ The following components or practices are exemplified (highlighted):
 
                 .. literalinclude:: ../examples/isaacgym/isaacgym_sequential_shared_memory_eval.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 64, 67, 70, 73-75, 129
+                    :emphasize-lines: 113-115, 126
 
     .. tab:: No shared memory
 
@@ -528,25 +524,23 @@ The following components or practices are exemplified (highlighted):
             
             .. tab:: Sequential training
                 
-                View the raw code: `isaacgym_sequential_no_shared_memory.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py>`_
+                :download:`isaacgym_sequential_no_shared_memory.py <../examples/isaacgym/isaacgym_sequential_no_shared_memory.py>`
 
                 .. literalinclude:: ../examples/isaacgym/isaacgym_sequential_no_shared_memory.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 81-83, 154, 161, 168, 179-180
+                    :emphasize-lines: 75-77, 151, 158, 165, 176-177
 
             .. tab:: Parallel training
                 
-                View the raw code: `isaacgym_parallel_no_shared_memory.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py>`_
+                :download:`isaacgym_parallel_no_shared_memory.py <../examples/isaacgym/isaacgym_parallel_no_shared_memory.py>`
 
                 .. literalinclude:: ../examples/isaacgym/isaacgym_parallel_no_shared_memory.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 14, 67, 179-182
+                    :emphasize-lines: 13, 67, 176-179
 
             .. tab:: Sequential eval...
                 
-                View the raw code: `isaacgym_sequential_no_shared_memory_eval.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py>`_
+                :download:`isaacgym_sequential_no_shared_memory_eval.py <../examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py>`
                 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
@@ -554,12 +548,11 @@ The following components or practices are exemplified (highlighted):
 
                 .. literalinclude:: ../examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 64, 67, 70, 73-75, 129
+                    :emphasize-lines: 113-115, 126
 
             .. tab:: Parallel eval...
                 
-                View the raw code: `isaacgym_parallel_no_shared_memory_eval.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py>`_
+                :download:`isaacgym_parallel_no_shared_memory_eval.py <../examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py>`
                 
                 **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments
 
@@ -567,8 +560,7 @@ The following components or practices are exemplified (highlighted):
 
                 .. literalinclude:: ../examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py
                     :language: python
-                    :linenos:
-                    :emphasize-lines: 85, 88, 91, 94-96, 150
+                    :emphasize-lines: 115-117, 128
 
 .. raw:: html
 
@@ -799,11 +791,10 @@ This example shows how to use the library utilities to carry out the post-proces
 
         Example of a figure, generated by the code, showing the total reward (left) and the mean and standard deviation (right) of all experiments located in the runs folder
         
-        View the raw code: `tensorboard_file_iterator.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/utils/tensorboard_file_iterator.py>`_
+        :download:`tensorboard_file_iterator.py <../examples/utils/tensorboard_file_iterator.py>`
 
         **Note:** The code will load all the Tensorboard files of the experiments located in the :literal:`runs` folder. It is necessary to adjust the iterator's parameters for other paths
 
         .. literalinclude:: ../examples/utils/tensorboard_file_iterator.py
             :language: python
-            :linenos:
             :emphasize-lines: 4, 11-13

From 3f4bb9415029d7ec1e640ba60f3b6627e0f9c9b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 13 Sep 2022 18:56:58 +0200
Subject: [PATCH 077/108] Set max and min clip values dtypes to float32

---
 skrl/models/torch/deterministic.py         | 4 ++--
 skrl/models/torch/gaussian.py              | 4 ++--
 skrl/models/torch/multivariate_gaussian.py | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py
index 72996d96..a90cfaff 100644
--- a/skrl/models/torch/deterministic.py
+++ b/skrl/models/torch/deterministic.py
@@ -55,8 +55,8 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None:
         self._d_clip_actions[role] = clip_actions and issubclass(type(self.action_space), gym.Space)
 
         if self._d_clip_actions[role]:
-            self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device)
-            self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device)
+            self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32)
+            self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device, dtype=torch.float32)
 
             # backward compatibility: torch < 1.9 clamp method does not support tensors
             self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9)
diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index 8e69a343..c56fce15 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -76,8 +76,8 @@ def __init__(self,
         self._g_clip_actions[role] = clip_actions and issubclass(type(self.action_space), gym.Space)
 
         if self._g_clip_actions[role]:
-            self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device)
-            self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device)
+            self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32)
+            self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device, dtype=torch.float32)
             
             # backward compatibility: torch < 1.9 clamp method does not support tensors
             self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9)
diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py
index b4d6e7b3..b2b85449 100644
--- a/skrl/models/torch/multivariate_gaussian.py
+++ b/skrl/models/torch/multivariate_gaussian.py
@@ -69,8 +69,8 @@ def __init__(self,
         self._mg_clip_actions[role] = clip_actions and issubclass(type(self.action_space), gym.Space)
 
         if self._mg_clip_actions[role]:
-            self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device)
-            self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device)
+            self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32)
+            self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device, dtype=torch.float32)
             
             # backward compatibility: torch < 1.9 clamp method does not support tensors
             self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9)

From 5c9f476f799a861ee1b887d5d95af2f9c15387de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 13 Sep 2022 21:59:16 +0200
Subject: [PATCH 078/108] Update Isaac Sim examples

---
 .../isaacsim/cartpole_example_skrl.py         | 24 +++++++++--------
 .../examples/isaacsim/isaacsim_jetbot_ppo.py  | 27 ++++++++++---------
 2 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/docs/source/examples/isaacsim/cartpole_example_skrl.py b/docs/source/examples/isaacsim/cartpole_example_skrl.py
index 5c4a16d0..d31c56e2 100644
--- a/docs/source/examples/isaacsim/cartpole_example_skrl.py
+++ b/docs/source/examples/isaacsim/cartpole_example_skrl.py
@@ -14,7 +14,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import GaussianModel, DeterministicModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.schedulers.torch import KLAdaptiveRL
@@ -22,14 +22,14 @@
 from skrl.envs.torch import wrap_env
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes.
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 64),
                                  nn.Tanh(),
@@ -38,12 +38,13 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(64, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return torch.tanh(self.net(states)), self.log_std_parameter
 
-class Value(DeterministicModel):
+class Value(DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 64),
                                  nn.Tanh(),
@@ -51,7 +52,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False):
                                  nn.Tanh(),
                                  nn.Linear(64, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         return self.net(states)
 
 
@@ -68,8 +69,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device, clip_actions=True),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():
diff --git a/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py b/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py
index 539d69b9..84790767 100644
--- a/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py
+++ b/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py
@@ -5,22 +5,21 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import DeterministicModel, GaussianModel
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.memories.torch import RandomMemory
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.trainers.torch import SequentialTrainer
 from skrl.envs.torch import wrap_env
 
 
-# Define the models (stochastic and deterministic models) for the agent using helper classes
-# and programming with two approaches (layer by layer and torch.nn.Sequential class).
+# Define the models (stochastic and deterministic models) for the agent using mixins.
 # - Policy: takes as input the environment's observation/state and returns an action
 # - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianModel):
+class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
                  clip_log_std=True, min_log_std=-20, max_log_std=2):
-        super().__init__(observation_space, action_space, device, clip_actions,
-                         clip_log_std, min_log_std, max_log_std)
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
 
         self.net = nn.Sequential(nn.Conv2d(3, 32, kernel_size=8, stride=4),
                                  nn.ReLU(),
@@ -40,15 +39,16 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(32, self.num_actions))
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         # view (samples, width * height * channels) -> (samples, width, height, channels)
         # permute (samples, width, height, channels) -> (samples, channels, width, height)
         x = self.net(states.view(-1, *self.observation_space.shape).permute(0, 3, 1, 2))
         return 10 * torch.tanh(x), self.log_std_parameter   # JetBotEnv action_space is -10 to 10
 
-class Value(DeterministicModel):
-    def __init__(self, observation_space, action_space, device, clip_actions = False):
-        super().__init__(observation_space, action_space, device, clip_actions)
+class Value(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Conv2d(3, 32, kernel_size=8, stride=4),
                                  nn.ReLU(),
@@ -67,7 +67,7 @@ def __init__(self, observation_space, action_space, device, clip_actions = False
                                  nn.Tanh(),
                                  nn.Linear(32, 1))
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         # view (samples, width * height * channels) -> (samples, width, height, channels)
         # permute (samples, width, height, channels) -> (samples, channels, width, height)
         return self.net(states.view(-1, *self.observation_space.shape).permute(0, 3, 1, 2))
@@ -87,8 +87,9 @@ def compute(self, states, taken_actions):
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
-models_ppo = {"policy": Policy(env.observation_space, env.action_space, device, clip_actions=True),
-              "value": Value(env.observation_space, env.action_space, device)}
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
 # Initialize the models' parameters (weights and biases) using a Gaussian distribution
 for model in models_ppo.values():

From d1d825522267e0df6ab843c427b8fd3a4c1d8506 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Tue, 13 Sep 2022 22:00:32 +0200
Subject: [PATCH 079/108] Update Isaac Sim examples in docs

---
 docs/source/intro/examples.rst | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index ca0756ce..ea41d7e2 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -678,14 +678,14 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
    <hr>
 
-Learning in an Omniverse Isaac Sim environment (one agent, one environment)
----------------------------------------------------------------------------
+Learning in an Omniverse Isaac Sim environment
+----------------------------------------------
 
-These examples show how to train an agent in an Omniverse Isaac Sim environment that is implemented using the OpenAI Gym interface (one environment)
+These examples show how to train an agent in an Omniverse Isaac Sim environment that is implemented using the OpenAI Gym interface (**one agent, one environment**)
 
 .. tabs::
 
-    .. tab:: Isaac Sim 2022.1.0 (Cartpole)
+    .. tab:: Isaac Sim 2022.1.X (Cartpole)
 
         This example performs the training of an agent in the Isaac Sim's Cartpole environment described in the `Creating New RL Environment <https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/tutorial_gym_new_rl_example.html>`_ tutorial
 
@@ -703,11 +703,10 @@ These examples show how to train an agent in an Omniverse Isaac Sim environment
 
             <br>
 
-        View the raw code: `cartpole_example_skrl.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacsim/cartpole_example_skrl.py>`_
+        :download:`cartpole_example_skrl.py <../examples/isaacsim/cartpole_example_skrl.py>`
 
         .. literalinclude:: ../examples/isaacsim/cartpole_example_skrl.py
             :language: python
-            :linenos:
 
     .. tab:: Isaac Sim 2021.2.1 (JetBot)
    
@@ -763,12 +762,11 @@ These examples show how to train an agent in an Omniverse Isaac Sim environment
 
             <br>
 
-        View the raw code: `isaacsim_jetbot_ppo.py <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py>`_
+        :download:`isaacsim_jetbot_ppo.py <../examples/isaacsim/isaacsim_jetbot_ppo.py>`
 
         .. literalinclude:: ../examples/isaacsim/isaacsim_jetbot_ppo.py
             :language: python
-            :linenos:
-            :emphasize-lines: 19-47, 49-73
+            :emphasize-lines: 24-39, 45, 53-68, 73
 
 .. _library_utilities:
 

From 1402290300164b4f89632ee7e67a0e6ee4379dd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 17 Sep 2022 10:10:42 +0200
Subject: [PATCH 080/108] Update snippets

---
 docs/source/snippets/agent.py                 |  55 +++++----
 docs/source/snippets/model.py                 |  43 --------
 docs/source/snippets/model_mixin.py           | 104 ++++++++++++++++++
 .../snippets/multivariate_gaussian_model.py   |   9 --
 docs/source/snippets/tabular_model.py         |  26 +----
 5 files changed, 141 insertions(+), 96 deletions(-)
 delete mode 100644 docs/source/snippets/model.py
 create mode 100644 docs/source/snippets/model_mixin.py

diff --git a/docs/source/snippets/agent.py b/docs/source/snippets/agent.py
index 7b23b079..1f27f789 100644
--- a/docs/source/snippets/agent.py
+++ b/docs/source/snippets/agent.py
@@ -1,4 +1,4 @@
-from typing import Union, Tuple, Dict
+from typing import Union, Tuple, Dict, Any
 
 import gym
 
@@ -17,7 +17,7 @@
         "write_interval": 250,      # TensorBoard writing interval (timesteps)
 
         "checkpoint_interval": 1000,        # interval for checkpoints (timesteps)
-        "checkpoint_policy_only": True,     # checkpoint for policy only
+        "store_separately": False,          # whether to store checkpoints separately
     }
 }
 
@@ -51,19 +51,25 @@ def __init__(self,
                          action_space=action_space, 
                          device=device, 
                          cfg=CUSTOM_DEFAULT_CONFIG)
-        # ================================
+        # =====================================================================
         # - get and process models from self.models
-        # - create self.checkpoint_models dictionary for storing checkpoints
+        # - populate self.checkpoint_modules dictionary for storing checkpoints
         # - parse configurations from self.cfg
-        # - setup optimizers
+        # - setup optimizers and learning rate scheduler
+        # - set up preprocessors
+        # =====================================================================
+
+    def init(self) -> None:
+        """Initialize the agent
+        """
+        super().init()
+        self.set_mode("eval")
+        # =================================================================
         # - create tensors in memory if required
-        # ================================
+        # - # create temporary variables needed for storage and computation
+        # =================================================================
 
-    def act(self, 
-            states: torch.Tensor, 
-            timestep: int, 
-            timesteps: int, 
-            inference: bool = False) -> torch.Tensor:
+    def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor:
         """Process the environment's states to make a decision (actions) using the main policy
 
         :param states: Environment's states
@@ -78,17 +84,18 @@ def act(self,
         :return: Actions
         :rtype: torch.Tensor
         """
-        # ================================
+        # ======================================
         # - sample random actions if required or
         #   sample and return agent's actions
-        # ================================
+        # ======================================
 
     def record_transition(self, 
                           states: torch.Tensor, 
                           actions: torch.Tensor, 
                           rewards: torch.Tensor, 
                           next_states: torch.Tensor, 
-                          dones: torch.Tensor, 
+                          dones: torch.Tensor,  
+                          infos: Any, 
                           timestep: int, 
                           timesteps: int) -> None:
         """Record an environment transition in memory
@@ -103,15 +110,17 @@ def record_transition(self,
         :type next_states: torch.Tensor
         :param dones: Signals to indicate that episodes have ended
         :type dones: torch.Tensor
+        :param infos: Additional information about the environment
+        :type infos: Any type supported by the environment
         :param timestep: Current timestep
         :type timestep: int
         :param timesteps: Number of timesteps
         :type timesteps: int
         """
-        super().record_transition(states, actions, rewards, next_states, dones, timestep, timesteps)
-        # ================================
+        super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps)
+        # ========================================
         # - record agent's specific data in memory
-        # ================================
+        # ========================================
 
     def pre_interaction(self, timestep: int, timesteps: int) -> None:
         """Callback called before the interaction with the environment
@@ -121,9 +130,9 @@ def pre_interaction(self, timestep: int, timesteps: int) -> None:
         :param timesteps: Number of timesteps
         :type timesteps: int
         """
-        # ================================
+        # ===================================
         # - call self.update(...) if required
-        # ================================
+        # ===================================
 
     def post_interaction(self, timestep: int, timesteps: int) -> None:
         """Callback called after the interaction with the environment
@@ -133,9 +142,9 @@ def post_interaction(self, timestep: int, timesteps: int) -> None:
         :param timesteps: Number of timesteps
         :type timesteps: int
         """
-        # ================================
+        # ===================================
         # - call self.update(...) if required
-        # ================================
+        # ===================================
         # call parent's method for checkpointing and TensorBoard writing
         super().post_interaction(timestep, timesteps)
 
@@ -147,7 +156,7 @@ def _update(self, timestep: int, timesteps: int) -> None:
         :param timesteps: Number of timesteps
         :type timesteps: int
         """
-        # ================================
+        # =================================================
         # - implement algorithm's update step
         # - record tracking data using self.track_data(...)
-        # ================================
+        # =================================================
diff --git a/docs/source/snippets/model.py b/docs/source/snippets/model.py
deleted file mode 100644
index e17e8b71..00000000
--- a/docs/source/snippets/model.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from typing import Union, Tuple
-
-import gym
-
-import torch
-
-from skrl.models.torch import Model     # from . import Model
-
-
-class CustomModel(Model):
-    def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0") -> None:
-        """
-        :param observation_space: Observation/state space or shape (default: None).
-                                  If it is not None, the num_observations property will contain the size of that space (number of elements)
-        :type observation_space: int, tuple or list of integers, gym.Space or None, optional
-        :param action_space: Action space or shape (default: None).
-                             If it is not None, the num_actions property will contain the size of that space (number of elements)
-        :type action_space: int, tuple or list of integers, gym.Space or None, optional
-        :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0")
-        :type device: str or torch.device, optional
-        """
-        super().__init__(observation_space, action_space, device)
-        
-    def act(self, states: torch.Tensor, taken_actions: Union[torch.Tensor, None] = None, inference=False) -> Tuple[torch.Tensor]:
-        """Act in response to the state of the environment
-
-        :param states: Observation/state of the environment used to make the decision
-        :type states: torch.Tensor
-        :param taken_actions: Actions taken by a policy to the given states (default: None).
-                              The use of these actions only makes sense in critical models, e.g.
-        :type taken_actions: torch.Tensor or None, optional
-        :param inference: Flag to indicate whether the model is making inference (default: False).
-                          If True, the returned tensors will be detached from the current graph
-        :type inference: bool, optional
-        
-        :return: Action to be taken by the agent given the state of the environment.
-                 The tuple's components are the actions, the log of the probability density function and mean actions
-        :rtype: tuple of torch.Tensor
-        """
-        # ================================
-        # - act in response to the state
-        # ================================
-        
\ No newline at end of file
diff --git a/docs/source/snippets/model_mixin.py b/docs/source/snippets/model_mixin.py
new file mode 100644
index 00000000..85c0af9e
--- /dev/null
+++ b/docs/source/snippets/model_mixin.py
@@ -0,0 +1,104 @@
+# [start-model]
+from typing import Optional, Union, Sequence
+
+import gym
+
+import torch
+
+from skrl.models.torch import Model     # from . import Model
+
+
+class CustomModel(Model):
+    def __init__(self, 
+                 observation_space: Union[int, Sequence[int], gym.Space], 
+                 action_space: Union[int, Sequence[int], gym.Space], 
+                 device: Union[str, torch.device] = "cuda:0") -> None:
+        """
+        :param observation_space: Observation/state space or shape.
+                                  The ``num_observations`` property will contain the size of that space
+        :type observation_space: int, sequence of int, gym.Space
+        :param action_space: Action space or shape.
+                             The ``num_actions`` property will contain the size of that space
+        :type action_space: int, sequence of int, gym.Space
+        :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``)
+        :type device: str or torch.device, optional
+        """
+        super().__init__(observation_space, action_space, device)
+        
+    def act(self, 
+            states: torch.Tensor, 
+            taken_actions: Optional[torch.Tensor] = None, 
+            role: str = "") -> Sequence[torch.Tensor]:
+        """Act according to the specified behavior
+
+        :param states: Observation/state of the environment used to make the decision
+        :type states: torch.Tensor
+        :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
+                              The use of these actions only makes sense in critical models, e.g.
+        :type taken_actions: torch.Tensor, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
+
+        :raises NotImplementedError: Child class must implement this method
+        
+        :return: Action to be taken by the agent given the state of the environment.
+                 The typical sequence's components are the actions, the log of the probability density function and mean actions.
+                 Deterministic agents must ignore the last two components and return empty tensors or None for them
+        :rtype: sequence of torch.Tensor
+        """
+        # ==============================
+        # - act in response to the state
+        # ==============================
+# [end-model]
+
+# =============================================================================
+
+# [start-mixin]
+from typing import Optional, Sequence
+
+import gym
+
+import torch
+
+
+class CustomMixin:
+    def __init__(self, clip_actions: bool = False, role: str = "") -> None:
+        """
+        :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: ``False``)
+        :type clip_actions: bool, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
+        """
+        # e.g. storage custom parameter
+        if not hasattr(self, "_custom_clip_actions"):
+            self._custom_clip_actions = {}
+        self._custom_clip_actions[role]
+
+    def act(self, 
+            states: torch.Tensor, 
+            taken_actions: Optional[torch.Tensor] = None, 
+            role: str = "") -> Sequence[torch.Tensor]:
+        """Act according to the specified behavior
+
+        :param states: Observation/state of the environment used to make the decision
+        :type states: torch.Tensor
+        :param taken_actions: Actions taken by a policy to the given states (default: ``None``).
+                              The use of these actions only makes sense in critical models, e.g.
+        :type taken_actions: torch.Tensor, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
+
+        :raises NotImplementedError: Child class must implement this method
+        
+        :return: Action to be taken by the agent given the state of the environment.
+                 The typical sequence's components are the actions, the log of the probability density function and mean actions.
+                 Deterministic agents must ignore the last two components and return empty tensors or None for them
+        :rtype: sequence of torch.Tensor
+        """
+        # ==============================
+        # - act in response to the state
+        # ==============================
+
+        # e.g. retrieve clip actions according to role
+        clip_actions = self._custom_clip_actions[role] if role in self._custom_clip_actions else self._custom_clip_actions[""]
+# [end-mixin]
\ No newline at end of file
diff --git a/docs/source/snippets/multivariate_gaussian_model.py b/docs/source/snippets/multivariate_gaussian_model.py
index d384271c..e53f7fe1 100644
--- a/docs/source/snippets/multivariate_gaussian_model.py
+++ b/docs/source/snippets/multivariate_gaussian_model.py
@@ -1,12 +1,3 @@
-import gym
-
-class DummyEnv:
-    observation_space = gym.spaces.Box(low=-1, high=1, shape=(5,))
-    action_space = gym.spaces.Box(low=-1, high=1, shape=(3,))
-    device = "cuda:0"
-
-env = DummyEnv()
-
 # [start-mlp]
 import torch
 import torch.nn as nn
diff --git a/docs/source/snippets/tabular_model.py b/docs/source/snippets/tabular_model.py
index f3355daa..4b94c69c 100644
--- a/docs/source/snippets/tabular_model.py
+++ b/docs/source/snippets/tabular_model.py
@@ -1,30 +1,19 @@
-import gym
-
-class DummyEnv:
-    observation_space = gym.spaces.Discrete(4)
-    action_space = gym.spaces.Discrete(3)
-    device = "cuda:0"
-    num_envs = 2
-
-env = DummyEnv()
-
 # [start-epsilon-greedy]
 import torch
 
-from skrl.models.torch import TabularModel
+from skrl.models.torch import Model, TabularMixin
 
 
 # define the model
-class EpilonGreedyPolicy(TabularModel):
+class EpilonGreedyPolicy(TabularMixin, Model):
     def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1):
-        super().__init__(observation_space, action_space, device, num_envs)
+        Model.__init__(self, observation_space, action_space, device)
+        TabularMixin.__init__(self, num_envs)
 
         self.epsilon = epsilon
         self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32)
-        
-        self.tables["q_table"] = self.q_table
 
-    def compute(self, states, taken_actions):
+    def compute(self, states, taken_actions, role):
         actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], 
                                dim=-1, keepdim=True).view(-1,1)
         
@@ -41,8 +30,3 @@ def compute(self, states, taken_actions):
                             num_envs=env.num_envs,
                             epsilon=0.15)
 # [end-epsilon-greedy]
-
-import torch
-policy.to(env.device)
-actions = policy.act(torch.tensor([[0, 1, 2, 3]], device=env.device))
-assert actions[0].shape == torch.Size([10, env.action_space.shape[0]])

From ee9f8ca5931f74b962e84bc05e49a956f9942695 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 17 Sep 2022 10:14:20 +0200
Subject: [PATCH 081/108] Purge some docs

---
 .../source/modules/skrl.agents.base_class.rst |  5 +--
 .../modules/skrl.memories.base_class.rst      |  2 --
 .../source/modules/skrl.models.base_class.rst | 22 ++++++++-----
 docs/source/modules/skrl.resources.noises.rst |  2 --
 .../skrl.utils.omniverse_isaacgym_utils.rst   | 33 +++++++++++++++++++
 5 files changed, 48 insertions(+), 16 deletions(-)
 create mode 100644 docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst

diff --git a/docs/source/modules/skrl.agents.base_class.rst b/docs/source/modules/skrl.agents.base_class.rst
index e7ad5637..4fea0caf 100644
--- a/docs/source/modules/skrl.agents.base_class.rst
+++ b/docs/source/modules/skrl.agents.base_class.rst
@@ -14,11 +14,8 @@ Basic inheritance usage
             
     .. tab:: Inheritance
 
-        View the raw code `here <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/snippets/agent.py>`_
-
         .. literalinclude:: ../snippets/agent.py
             :language: python
-            :linenos:
 
 API
 ^^^
@@ -27,7 +24,7 @@ API
    :undoc-members:
    :show-inheritance:
    :inherited-members:
-   :private-members: _update
+   :private-members: _update, _empty_preprocessor, _get_internal_value
    :members:
    
    .. automethod:: __init__
diff --git a/docs/source/modules/skrl.memories.base_class.rst b/docs/source/modules/skrl.memories.base_class.rst
index a40f9afb..101e98dc 100644
--- a/docs/source/modules/skrl.memories.base_class.rst
+++ b/docs/source/modules/skrl.memories.base_class.rst
@@ -14,8 +14,6 @@ Basic inheritance usage
             
     .. tab:: Inheritance
 
-        View the raw code `here <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/snippets/memory.py>`_
-
         .. literalinclude:: ../snippets/memory.py
             :language: python
             :linenos:
diff --git a/docs/source/modules/skrl.models.base_class.rst b/docs/source/modules/skrl.models.base_class.rst
index 5ed11e5b..221221f3 100644
--- a/docs/source/modules/skrl.models.base_class.rst
+++ b/docs/source/modules/skrl.models.base_class.rst
@@ -7,18 +7,24 @@ Base class
     It provides the basic functionality for the other classes.
     **It is not intended to be used directly**.
 
-Basic inheritance usage
-^^^^^^^^^^^^^^^^^^^^^^^
+Mixin and inheritance
+^^^^^^^^^^^^^^^^^^^^^
 
 .. tabs::
-            
-    .. tab:: Inheritance
 
-        View the raw code `here <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/snippets/model.py>`_
+    .. tab:: Mixin
 
-        .. literalinclude:: ../snippets/model.py
+        .. literalinclude:: ../snippets/model_mixin.py
             :language: python
-            :linenos:
+            :start-after: [start-mixin]
+            :end-before: [end-mixin]
+
+    .. tab:: Model inheritance
+
+        .. literalinclude:: ../snippets/model_mixin.py
+            :language: python
+            :start-after: [start-model]
+            :end-before: [end-model]
 
 API
 ^^^
@@ -26,7 +32,7 @@ API
 .. autoclass:: skrl.models.torch.base.Model
     :undoc-members:
     :show-inheritance:
-    :private-members: _get_space_size, _get_instantiator_output
+    :private-members: _get_space_size
     :members:
    
     .. automethod:: __init__
diff --git a/docs/source/modules/skrl.resources.noises.rst b/docs/source/modules/skrl.resources.noises.rst
index a9450eb5..a988f0fe 100644
--- a/docs/source/modules/skrl.resources.noises.rst
+++ b/docs/source/modules/skrl.resources.noises.rst
@@ -117,8 +117,6 @@ Basic inheritance usage
             
     .. tab:: Inheritance
 
-        View the raw code `here <https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/snippets/noise.py>`_
-
         .. literalinclude:: ../snippets/noise.py
             :language: python
             :linenos:
diff --git a/docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst b/docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst
new file mode 100644
index 00000000..806ed6d8
--- /dev/null
+++ b/docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst
@@ -0,0 +1,33 @@
+Omniverse Isaac Gym utils
+=========================
+
+.. contents:: Table of Contents
+   :depth: 2
+   :local:
+   :backlinks: none
+
+.. raw:: html
+
+   <hr>
+
+Control of robotic manipulators
+-------------------------------
+
+Inverse kinematics using damped least squares method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This implementation attempts to unify under a single and reusable function the whole set of procedures used to calculate the inverse kinematics of a robotic manipulator shown originally in Isaac Gym's example: Franka IK Picking (:literal:`franka_cube_ik_osc.py`) but this time for Omniverse Isaac Gym
+
+:math:`\Delta\theta = J^T (JJ^T + \lambda^2 I)^{-1} \, \vec{e}`
+
+where
+
+| :math:`\qquad \Delta\theta \;` is the change in joint angles
+| :math:`\qquad J \;` is the Jacobian
+| :math:`\qquad \lambda \;` is a non-zero damping constant
+| :math:`\qquad \vec{e} \;` is the Cartesian pose error (position and orientation)
+
+API
+"""
+
+.. autofunction:: skrl.utils.omniverse_isaacgym_utils.ik

From 2c7099cd99df0a1aa7771e83c8c541304d0ff752 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 19 Sep 2022 14:37:08 +0200
Subject: [PATCH 082/108] Add Franka Emika example files

---
 .../reaching_franka_real_env.py               | 276 ++++++++++++++++++
 .../reaching_franka_real_skrl_eval.py         |  88 ++++++
 .../reaching_franka_sim_env.py                | 262 +++++++++++++++++
 .../reaching_franka_sim_skrl_eval.py          |  92 ++++++
 .../reaching_franka_sim_skrl_train.py         | 133 +++++++++
 5 files changed, 851 insertions(+)
 create mode 100644 docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py
 create mode 100644 docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py
 create mode 100644 docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_env.py
 create mode 100644 docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_eval.py
 create mode 100644 docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_train.py

diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py
new file mode 100644
index 00000000..d67cfb0b
--- /dev/null
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py
@@ -0,0 +1,276 @@
+import gym
+import time
+import threading
+import numpy as np
+from packaging import version
+
+import frankx
+
+
+class ReachingFranka(gym.Env):
+    def __init__(self, robot_ip="172.16.0.2", device="cuda:0", control_space="joint", motion_type="waypoint", camera_tracking=False):
+        # gym API
+        self._drepecated_api = version.parse(gym.__version__) < version.parse(" 0.25.0")
+
+        self.device = device
+        self.control_space = control_space  # joint or cartesian
+        self.motion_type = motion_type  # waypoint or impedance
+
+        if self.control_space == "cartesian" and self.motion_type == "impedance":
+            raise ValueError("Unsafe robot operation in cartesian/impedance configuration")
+
+        # camera tracking (disabled by default)
+        self.camera_tracking = camera_tracking
+        if self.camera_tracking:
+            threading.Thread(target=self._update_target_from_camera).start()
+
+        # spaces
+        self.observation_space = gym.spaces.Box(low=-1000, high=1000, shape=(18,), dtype=np.float32)
+        if self.control_space == "joint":
+            self.action_space = gym.spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32)
+        elif self.control_space == "cartesian":
+            self.action_space = gym.spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32)
+        else:
+            raise ValueError("Invalid control space:", self.control_space)
+
+        # init real franka
+        print("Connecting to robot at {}...".format(robot_ip))
+        self.robot = frankx.Robot(robot_ip)
+        self.robot.set_default_behavior()
+        self.robot.recover_from_errors()
+        
+        # the robot's response can be better managed by independently setting the following properties, for example:
+        # - self.robot.velocity_rel = 0.2
+        # - self.robot.acceleration_rel = 0.1
+        # - self.robot.jerk_rel = 0.01
+        self.robot.set_dynamic_rel(0.25)
+
+        self.gripper = self.robot.get_gripper()
+        print("Robot connected")
+
+        self.motion = None
+        self.motion_thread = None
+
+        self.dt = 1 / 120.0
+        self.action_scale = 2.5
+        self.dof_vel_scale = 0.1
+        self.max_episode_length = 100
+        self.robot_dof_speed_scales = 1
+        self.target_pos = np.array([0.65, 0.2, 0.2])
+        self.robot_default_dof_pos = np.radians([0, -45, 0, -135, 0, 90, 45])
+        self.robot_dof_lower_limits = np.array([-2.8973, -1.7628, -2.8973, -3.0718, -2.8973, -0.0175, -2.8973])
+        self.robot_dof_upper_limits = np.array([ 2.8973,  1.7628,  2.8973, -0.0698,  2.8973,  3.7525,  2.8973])
+
+        self.progress_buf = 1
+        self.obs_buf = np.zeros((18,), dtype=np.float32)
+
+    def _update_target_from_camera(self):
+        pixel_to_meter = 1.11 / 375  # m/px: adjust for custom cases
+
+        import cv2
+        cap = cv2.VideoCapture(0)
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+
+            # convert to HSV and remove noise
+            hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
+            hsv = cv2.medianBlur(hsv, 15)
+
+            # color matching in HSV
+            mask = cv2.inRange(hsv, np.array([80, 100, 100]), np.array([100, 255, 255]))
+            M = cv2.moments(mask)
+            if M["m00"]:
+                x = M["m10"] / M["m00"]
+                y = M["m01"] / M["m00"]
+
+                # real-world position (fixed z to 0.2 meters)
+                pos = np.array([pixel_to_meter * (y - 185), pixel_to_meter * (x - 320), 0.2])
+                if self is not None:
+                    self.target_pos = pos
+
+                # draw target
+                frame = cv2.circle(frame, (int(x), int(y)), 30, (0,0,255), 2)
+                frame = cv2.putText(frame, str(np.round(pos, 4).tolist()), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv2.LINE_AA)
+
+            # show images
+            cv2.imshow("frame", frame)
+            cv2.imshow("mask", mask)
+            k = cv2.waitKey(1) & 0xFF
+            if k == ord('q'):
+                cap.release()
+
+    def _get_observation_reward_done(self):
+        # get robot state
+        try:
+            robot_state = self.robot.get_state(read_once=True)
+        except frankx.InvalidOperationException:
+            robot_state = self.robot.get_state(read_once=False)
+
+        # observation
+        robot_dof_pos = np.array(robot_state.q)
+        robot_dof_vel = np.array(robot_state.dq)
+        end_effector_pos = np.array(robot_state.O_T_EE[-4:-1])
+
+        dof_pos_scaled = 2.0 * (robot_dof_pos - self.robot_dof_lower_limits) / (self.robot_dof_upper_limits - self.robot_dof_lower_limits) - 1.0
+        dof_vel_scaled = robot_dof_vel * self.dof_vel_scale
+
+        self.obs_buf[0] = self.progress_buf / float(self.max_episode_length)
+        self.obs_buf[1:8] = dof_pos_scaled
+        self.obs_buf[8:15] = dof_vel_scaled
+        self.obs_buf[15:18] = self.target_pos
+
+        # reward
+        distance = np.linalg.norm(end_effector_pos - self.target_pos)
+        reward = -distance
+
+        # done
+        done = self.progress_buf >= self.max_episode_length - 1
+        done = done or distance <= 0.075
+
+        print("Distance:", distance)
+        if done:
+            print("Target or Maximum episode length reached")
+            time.sleep(1)
+
+        return self.obs_buf, reward, done
+
+    def reset(self):
+        print("Reseting...")
+
+        # end current motion
+        if self.motion is not None:
+            self.motion.finish()
+            self.motion_thread.join()
+        self.motion = None
+        self.motion_thread = None
+        
+        # open/close gripper
+        # self.gripper.open()
+        # self.gripper.clamp()
+
+        # go to 1) safe position, 2) random position 
+        self.robot.move(frankx.JointMotion(self.robot_default_dof_pos.tolist()))
+        dof_pos = self.robot_default_dof_pos + 0.25 * (np.random.rand(7) - 0.5)
+        self.robot.move(frankx.JointMotion(dof_pos.tolist()))
+
+        # get target position from prompt
+        if not self.camera_tracking:
+            while True:
+                try:
+                    print("Enter target position (X, Y, Z) in meters")
+                    raw = input("or press [Enter] key for a random target position: ")
+                    if raw:
+                        self.target_pos = np.array([float(p) for p in raw.replace(' ', '').split(',')])
+                    else:
+                        noise = (2 * np.random.rand(3) - 1) * np.array([0.25, 0.25, 0.10])
+                        self.target_pos = np.array([0.5, 0.0, 0.2]) + noise
+                    print("Target position:", self.target_pos)
+                    break
+                except ValueError:
+                    print("Invalid input. Try something like: 0.65, 0.0, 0.2")
+
+        # initial pose
+        affine = frankx.Affine(frankx.Kinematics.forward(dof_pos.tolist()))
+        affine = affine * frankx.Affine(x=0, y=0, z=-0.10335, a=np.pi/2)
+        
+        # motion type
+        if self.motion_type == "waypoint":
+            self.motion = frankx.WaypointMotion([frankx.Waypoint(affine)], return_when_finished=False)
+        elif self.motion_type == "impedance":
+            self.motion = frankx.ImpedanceMotion(500, 50)
+        else:
+            raise ValueError("Invalid motion type:", self.motion_type)
+        
+        self.motion_thread = self.robot.move_async(self.motion)
+        if self.motion_type == "impedance":
+            self.motion.target = affine
+
+        input("Press [Enter] to continue")
+
+        self.progress_buf = 0
+        observation, reward, done = self._get_observation_reward_done()
+
+        if self._drepecated_api:
+            return observation
+        else:
+            return observation, {}
+            
+    def step(self, action):
+        self.progress_buf += 1
+
+        # control space
+        # joint
+        if self.control_space == "joint":
+            # get robot state
+            try:
+                robot_state = self.robot.get_state(read_once=True)
+            except frankx.InvalidOperationException:
+                robot_state = self.robot.get_state(read_once=False)
+            # forward kinematics
+            dof_pos = np.array(robot_state.q) + (self.robot_dof_speed_scales * self.dt * action * self.action_scale)
+            affine = frankx.Affine(self.robot.forward_kinematics(dof_pos.flatten().tolist()))
+            affine = affine * frankx.Affine(x=0, y=0, z=-0.10335, a=np.pi/2)
+        # cartesian
+        elif self.control_space == "cartesian":
+            action /= 100.0
+            if self.motion_type == "waypoint":
+                affine = frankx.Affine(x=action[0], y=action[1], z=action[2])
+            elif self.motion_type == "impedance":
+                # get robot pose
+                try:
+                    robot_pose = self.robot.current_pose(read_once=True)
+                except frankx.InvalidOperationException:
+                    robot_pose = self.robot.current_pose(read_once=False)
+                affine = robot_pose * frankx.Affine(x=action[0], y=action[1], z=action[2])
+
+        # motion type
+        # waypoint motion
+        if self.motion_type == "waypoint":
+            if self.control_space == "joint":
+                self.motion.set_next_waypoint(frankx.Waypoint(affine))
+            elif self.control_space == "cartesian":
+                self.motion.set_next_waypoint(frankx.Waypoint(affine, frankx.Waypoint.Relative))
+        # impedance motion
+        elif self.motion_type == "impedance":
+            self.motion.target = affine
+        else:
+            raise ValueError("Invalid motion type:", self.motion_type)
+
+        # the use of time.sleep is for simplicity. This does not guarantee control at a specific frequency
+        time.sleep(0.1)  # lower frequency, at 30Hz there are discontinuities
+
+        observation, reward, done = self._get_observation_reward_done()
+
+        if self._drepecated_api:
+            return observation, reward, done, {}
+        else:
+            return observation, reward, done, done, {}
+
+    def render(self, *args, **kwargs):
+        pass
+
+    def close(self):
+        pass
+
+
+
+
+if __name__ == "__main__":
+
+    # test camera capturing
+    ReachingFranka._update_target_from_camera(None)
+    exit()
+    
+
+    env = ReachingFranka()
+
+    observation = env.reset()
+    for _ in range(100):
+        observation, reward, done, info = env.step(env.action_space.sample())
+        env.render()
+        if done:
+            observation = env.reset()
+    
+    env.close()
diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py
new file mode 100644
index 00000000..0d64487e
--- /dev/null
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py
@@ -0,0 +1,88 @@
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+
+
+# Define only the policy for evaluation 
+class Policy(GaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, 64),
+                                 nn.ELU(),
+                                 nn.Linear(64, self.num_actions))
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
+
+
+# Load the environment
+from reaching_franka_real_env import ReachingFranka
+
+control_space = "joint"   # joint or cartesian
+motion_type = "waypoint"  # waypoint or impedance
+camera_tracking = False   # True for USB-camera tracking 
+
+env = ReachingFranka(robot_ip="172.16.0.2",
+                     device="cpu",
+                     control_space=control_space,
+                     motion_type=motion_type,
+                     camera_tracking=camera_tracking)
+
+# wrap the environment
+env = wrap_env(env)
+
+device = env.device
+
+
+# Instantiate the agent's policy.
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+# logging to TensorBoard each 32 timesteps an ignore checkpoints
+cfg_ppo["experiment"]["write_interval"] = 32
+cfg_ppo["experiment"]["checkpoint_interval"] = 0
+
+agent = PPO(models=models_ppo,
+            memory=None, 
+            cfg=cfg_ppo, 
+            observation_space=env.observation_space, 
+            action_space=env.action_space,
+            device=device)
+
+# load checkpoints
+if control_space == "joint":
+    agent.load("./agent_joint.pt")
+elif control_space == "cartesian":
+    agent.load("./agent_cartesian.pt")
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 1000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start evaluation
+trainer.eval()
diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_env.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_env.py
new file mode 100644
index 00000000..d530abc7
--- /dev/null
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_env.py
@@ -0,0 +1,262 @@
+import torch
+import numpy as np
+
+from omniisaacgymenvs.tasks.base.rl_task import RLTask
+from omniisaacgymenvs.robots.articulations.franka import Franka as Robot
+
+from omni.isaac.core.prims import RigidPrimView, XFormPrimView
+from omni.isaac.core.articulations import ArticulationView
+from omni.isaac.core.objects import VisualSphere
+from omni.isaac.core.utils.prims import get_prim_at_path
+
+from skrl.utils import omniverse_isaacgym_utils
+
+# post_physics_step calls
+# - get_observations()
+# - get_states()
+# - calculate_metrics()
+# - is_done()
+# - get_extras()
+
+
+TASK_CFG = {"test": False, 
+            "device_id": 0, 
+            "headless": True,
+            "sim_device": "gpu", 
+            "task": {"name": "ReachingFranka", 
+                     "physics_engine": "physx", 
+                     "env": {"numEnvs": 1024, 
+                             "envSpacing": 1.5, 
+                             "episodeLength": 100, 
+                             "enableDebugVis": False, 
+                             "clipObservations": 1000.0, 
+                             "clipActions": 1.0, 
+                             "controlFrequencyInv": 4, 
+                             "actionScale": 2.5, 
+                             "dofVelocityScale": 0.1, 
+                             "controlSpace": "cartesian"}, 
+                     "sim": {"dt": 0.0083,  # 1 / 120
+                             "use_gpu_pipeline": True, 
+                             "gravity": [0.0, 0.0, -9.81], 
+                             "add_ground_plane": True, 
+                             "use_flatcache": True, 
+                             "enable_scene_query_support": False, 
+                             "enable_cameras": False, 
+                             "default_physics_material": {"static_friction": 1.0, 
+                                                         "dynamic_friction": 1.0, 
+                                                         "restitution": 0.0}, 
+                             "physx": {"worker_thread_count": 4, 
+                                      "solver_type": 1, 
+                                      "use_gpu": True, 
+                                      "solver_position_iteration_count": 4, 
+                                      "solver_velocity_iteration_count": 1, 
+                                      "contact_offset": 0.005, 
+                                      "rest_offset": 0.0, 
+                                      "bounce_threshold_velocity": 0.2, 
+                                      "friction_offset_threshold": 0.04, 
+                                      "friction_correlation_distance": 0.025, 
+                                      "enable_sleeping": True, 
+                                      "enable_stabilization": True, 
+                                      "max_depenetration_velocity": 1000.0, 
+                                      "gpu_max_rigid_contact_count": 524288, 
+                                      "gpu_max_rigid_patch_count": 33554432, 
+                                      "gpu_found_lost_pairs_capacity": 524288, 
+                                      "gpu_found_lost_aggregate_pairs_capacity": 262144, 
+                                      "gpu_total_aggregate_pairs_capacity": 1048576, 
+                                      "gpu_max_soft_body_contacts": 1048576, 
+                                      "gpu_max_particle_contacts": 1048576, 
+                                      "gpu_heap_capacity": 33554432, 
+                                      "gpu_temp_buffer_capacity": 16777216, 
+                                      "gpu_max_num_partitions": 8}, 
+                             "robot": {"override_usd_defaults": False, 
+                                       "fixed_base": False, 
+                                       "enable_self_collisions": False, 
+                                       "enable_gyroscopic_forces": True, 
+                                       "solver_position_iteration_count": 4, 
+                                       "solver_velocity_iteration_count": 1, 
+                                       "sleep_threshold": 0.005, 
+                                       "stabilization_threshold": 0.001, 
+                                       "density": -1, 
+                                       "max_depenetration_velocity": 1000.0, 
+                                       "contact_offset": 0.005, 
+                                       "rest_offset": 0.0}}}}
+
+
+class RobotView(ArticulationView):
+    def __init__(self, prim_paths_expr: str, name: str = "robot_view") -> None:
+        super().__init__(prim_paths_expr=prim_paths_expr, name=name, reset_xform_properties=False)
+
+
+class ReachingFrankaTask(RLTask):
+    def __init__(self, name, sim_config, env, offset=None) -> None:
+        self._sim_config = sim_config
+        self._cfg = sim_config.config
+        self._task_cfg = sim_config.task_config
+
+        self.dt = 1 / 120.0
+
+        self._num_envs = self._task_cfg["env"]["numEnvs"]
+        self._env_spacing = self._task_cfg["env"]["envSpacing"]
+        self._action_scale = self._task_cfg["env"]["actionScale"]
+        self._dof_vel_scale = self._task_cfg["env"]["dofVelocityScale"]
+        self._max_episode_length = self._task_cfg["env"]["episodeLength"]
+        self._control_space = self._task_cfg["env"]["controlSpace"]
+
+        # observation and action space
+        self._num_observations = 18
+        if self._control_space == "joint":
+            self._num_actions = 7
+        elif self._control_space == "cartesian":
+            self._num_actions = 3
+        else:
+            raise ValueError("Invalid control space: {}".format(self._control_space))
+
+        self._end_effector_link = "panda_leftfinger"
+
+        RLTask.__init__(self, name, env)
+
+    def set_up_scene(self, scene) -> None:
+        self.get_robot()
+        self.get_target()
+        
+        super().set_up_scene(scene)
+
+        # robot view
+        self._robots = RobotView(prim_paths_expr="/World/envs/.*/robot", name="robot_view")
+        scene.add(self._robots)
+        # end-effectors view
+        self._end_effectors = RigidPrimView(prim_paths_expr="/World/envs/.*/robot/{}".format(self._end_effector_link), name="end_effector_view")
+        scene.add(self._end_effectors)
+        # hands view (cartesian)
+        if self._control_space == "cartesian":
+            self._hands = RigidPrimView(prim_paths_expr="/World/envs/.*/robot/panda_hand", name="hand_view", reset_xform_properties=False)
+            scene.add(self._hands)
+        # target view
+        self._targets = XFormPrimView(prim_paths_expr="/World/envs/.*/target", name="target_view", reset_xform_properties=False)
+        scene.add(self._targets)
+        
+        self.init_data()
+
+    def get_robot(self):
+        robot = Robot(prim_path=self.default_zero_env_path + "/robot", 
+                      translation=torch.tensor([0.0, 0.0, 0.0]), 
+                      orientation=torch.tensor([1.0, 0.0, 0.0, 0.0]),
+                      name="robot")
+        self._sim_config.apply_articulation_settings("robot", get_prim_at_path(robot.prim_path), self._sim_config.parse_actor_config("robot"))
+
+    def get_target(self):
+        target = VisualSphere(prim_path=self.default_zero_env_path + "/target", 
+                              name="target",
+                              radius=0.025,
+                              color=torch.tensor([1, 0, 0]))
+        target.set_collision_enabled(False)
+
+    def init_data(self) -> None:
+        self.robot_default_dof_pos = torch.tensor(np.radians([0, -45, 0, -135, 0, 90, 45, 0, 0]), device=self._device, dtype=torch.float32)
+        self.actions = torch.zeros((self._num_envs, self.num_actions), device=self._device)
+
+        if self._control_space == "cartesian":
+            self.jacobians = torch.zeros((self._num_envs, 10, 6, 9), device=self._device)
+            self.hand_pos, self.hand_rot = torch.zeros((self._num_envs, 3), device=self._device), torch.zeros((self._num_envs, 4), device=self._device)
+
+    def get_observations(self) -> dict:
+        robot_dof_pos = self._robots.get_joint_positions(clone=False)
+        robot_dof_vel = self._robots.get_joint_velocities(clone=False)
+        end_effector_pos, end_effector_rot = self._end_effectors.get_local_poses()
+        target_pos, target_rot = self._targets.get_local_poses()
+
+        dof_pos_scaled = 2.0 * (robot_dof_pos - self.robot_dof_lower_limits) \
+            / (self.robot_dof_upper_limits - self.robot_dof_lower_limits) - 1.0
+        dof_vel_scaled = robot_dof_vel * self._dof_vel_scale
+
+        generalization_noise = torch.rand((dof_vel_scaled.shape[0], 7), device=self._device) + 0.5
+
+        self.obs_buf[:, 0] = self.progress_buf / self._max_episode_length
+        self.obs_buf[:, 1:8] = dof_pos_scaled[:, :7]
+        self.obs_buf[:, 8:15] = dof_vel_scaled[:, :7] * generalization_noise
+        self.obs_buf[:, 15:18] = target_pos
+
+        # compute distance for calculate_metrics() and is_done()
+        self._computed_distance = torch.norm(end_effector_pos - target_pos, dim=-1)
+
+        if self._control_space == "cartesian":
+            self.jacobians = self._robots.get_jacobians(clone=False)
+            self.hand_pos, self.hand_rot = self._hands.get_local_poses()
+
+        return {self._robots.name: {"obs_buf": self.obs_buf}}
+
+    def pre_physics_step(self, actions) -> None:
+        reset_env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
+        if len(reset_env_ids) > 0:
+            self.reset_idx(reset_env_ids)
+
+        self.actions = actions.clone().to(self._device)
+        env_ids_int32 = torch.arange(self._robots.count, dtype=torch.int32, device=self._device)
+
+        if self._control_space == "joint":
+            targets = self.robot_dof_targets[:, :7] + self.robot_dof_speed_scales[:7] * self.dt * self.actions * self._action_scale
+        
+        elif self._control_space == "cartesian":
+            goal_position = self.hand_pos + actions / 100.0
+            delta_dof_pos = omniverse_isaacgym_utils.ik(jacobian_end_effector=self.jacobians[:, 8 - 1, :, :7],  # franka hand index: 8
+                                                        current_position=self.hand_pos,
+                                                        current_orientation=self.hand_rot,
+                                                        goal_position=goal_position,
+                                                        goal_orientation=None)
+            targets = self.robot_dof_targets[:, :7] + delta_dof_pos
+
+        self.robot_dof_targets[:, :7] = torch.clamp(targets, self.robot_dof_lower_limits[:7], self.robot_dof_upper_limits[:7])
+        self.robot_dof_targets[:, 7:] = 0
+        self._robots.set_joint_position_targets(self.robot_dof_targets, indices=env_ids_int32)
+
+    def reset_idx(self, env_ids) -> None:
+        indices = env_ids.to(dtype=torch.int32)
+
+        # reset robot
+        pos = torch.clamp(self.robot_default_dof_pos.unsqueeze(0) + 0.25 * (torch.rand((len(env_ids), self.num_robot_dofs), device=self._device) - 0.5),
+                          self.robot_dof_lower_limits, self.robot_dof_upper_limits)
+        dof_pos = torch.zeros((len(indices), self._robots.num_dof), device=self._device)
+        dof_pos[:, :] = pos
+        dof_pos[:, 7:] = 0
+        dof_vel = torch.zeros((len(indices), self._robots.num_dof), device=self._device)
+        self.robot_dof_targets[env_ids, :] = pos
+        self.robot_dof_pos[env_ids, :] = pos
+
+        self._robots.set_joint_position_targets(self.robot_dof_targets[env_ids], indices=indices)
+        self._robots.set_joint_positions(dof_pos, indices=indices)
+        self._robots.set_joint_velocities(dof_vel, indices=indices)
+
+        # reset target
+        pos = (torch.rand((len(env_ids), 3), device=self._device) - 0.5) * 2
+        pos[:, 0] = 0.50 + pos[:, 0] * 0.25
+        pos[:, 1] = 0.00 + pos[:, 1] * 0.25
+        pos[:, 2] = 0.20 + pos[:, 2] * 0.10
+
+        self._targets.set_local_poses(pos, indices=indices)
+
+        # bookkeeping
+        self.reset_buf[env_ids] = 0
+        self.progress_buf[env_ids] = 0
+
+    def post_reset(self):
+        self.num_robot_dofs = self._robots.num_dof
+        self.robot_dof_pos = torch.zeros((self.num_envs, self.num_robot_dofs), device=self._device)
+        dof_limits = self._robots.get_dof_limits()
+        self.robot_dof_lower_limits = dof_limits[0, :, 0].to(device=self._device)
+        self.robot_dof_upper_limits = dof_limits[0, :, 1].to(device=self._device)
+        self.robot_dof_speed_scales = torch.ones_like(self.robot_dof_lower_limits)
+        self.robot_dof_targets = torch.zeros((self._num_envs, self.num_robot_dofs), dtype=torch.float, device=self._device)
+
+        # randomize all envs
+        indices = torch.arange(self._num_envs, dtype=torch.int64, device=self._device)
+        self.reset_idx(indices)
+
+    def calculate_metrics(self) -> None:
+        self.rew_buf[:] = -self._computed_distance
+
+    def is_done(self) -> None:
+        self.reset_buf.fill_(0)
+        # target reached
+        self.reset_buf = torch.where(self._computed_distance <= 0.035, torch.ones_like(self.reset_buf), self.reset_buf)
+        # max episode length
+        self.reset_buf = torch.where(self.progress_buf >= self._max_episode_length - 1, torch.ones_like(self.reset_buf), self.reset_buf)
diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_eval.py
new file mode 100644
index 00000000..f9283779
--- /dev/null
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_eval.py
@@ -0,0 +1,92 @@
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.utils.omniverse_isaacgym_utils import get_env_instance
+from skrl.envs.torch import wrap_env
+
+
+# Define only the policy for evaluation 
+class Policy(GaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, 64),
+                                 nn.ELU(),
+                                 nn.Linear(64, self.num_actions))
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
+
+
+# instance VecEnvBase and setup task
+headless = True  # set headless to False for rendering
+env = get_env_instance(headless=headless)  
+
+from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig
+from reaching_franka_sim_env import ReachingFrankaTask, TASK_CFG
+
+TASK_CFG["headless"] = headless
+TASK_CFG["task"]["env"]["numEnvs"] = 64
+TASK_CFG["task"]["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
+
+sim_config = SimConfig(TASK_CFG)
+task = ReachingFrankaTask(name="ReachingFranka", sim_config=sim_config, env=env)
+env.set_task(task=task, sim_params=sim_config.get_physics_params(), backend="torch", init_sim=True)
+
+# wrap the environment
+env = wrap_env(env, "omniverse-isaacgym")
+
+device = env.device
+
+
+# Instantiate the agent's policy.
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+# logging to TensorBoard each 32 timesteps an ignore checkpoints
+cfg_ppo["experiment"]["write_interval"] = 32
+cfg_ppo["experiment"]["checkpoint_interval"] = 0
+
+agent = PPO(models=models_ppo,
+            memory=None, 
+            cfg=cfg_ppo, 
+            observation_space=env.observation_space, 
+            action_space=env.action_space,
+            device=device)
+
+# load checkpoints
+if TASK_CFG["task"]["env"]["controlSpace"] == "joint":
+    agent.load("./agent_joint.pt")
+elif TASK_CFG["task"]["env"]["controlSpace"] == "cartesian":
+    agent.load("./agent_cartesian.pt")
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 5000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start evaluation
+trainer.eval()
diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_train.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_train.py
new file mode 100644
index 00000000..b9a31852
--- /dev/null
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_train.py
@@ -0,0 +1,133 @@
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
+from skrl.memories.torch import RandomMemory
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.utils.omniverse_isaacgym_utils import get_env_instance
+from skrl.envs.torch import wrap_env
+from skrl.utils import set_seed
+
+
+# set the seed for reproducibility
+set_seed(42)
+
+
+# Define the models (stochastic and deterministic models) for the agent using helper mixin.
+# - Policy: takes as input the environment's observation/state and returns an action
+# - Value: takes the state as input and provides a value to guide the policy
+class Policy(GaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, 64),
+                                 nn.ELU(),
+                                 nn.Linear(64, self.num_actions))
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
+
+class Value(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, 64),
+                                 nn.ELU(),
+                                 nn.Linear(64, 1))
+
+    def compute(self, states, taken_actions, role):
+        return self.net(states)
+
+
+# instance VecEnvBase and setup task
+headless = True  # set headless to False for rendering
+env = get_env_instance(headless=headless)  
+
+from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig
+from reaching_franka_sim_env import ReachingFrankaTask, TASK_CFG
+
+TASK_CFG["headless"] = headless
+TASK_CFG["task"]["env"]["numEnvs"] = 1024
+TASK_CFG["task"]["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
+
+sim_config = SimConfig(TASK_CFG)
+task = ReachingFrankaTask(name="ReachingFranka", sim_config=sim_config, env=env)
+env.set_task(task=task, sim_params=sim_config.get_physics_params(), backend="torch", init_sim=True)
+
+# wrap the environment
+env = wrap_env(env, "omniverse-isaacgym")
+
+device = env.device
+
+
+# Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
+memory = RandomMemory(memory_size=16, num_envs=env.num_envs, device=device)
+
+
+# Instantiate the agent's models (function approximators).
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["rollouts"] = 16
+cfg_ppo["learning_epochs"] = 8
+cfg_ppo["mini_batches"] = 8  
+cfg_ppo["discount_factor"] = 0.99
+cfg_ppo["lambda"] = 0.95
+cfg_ppo["learning_rate"] = 5e-4
+cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
+cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008}
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["grad_norm_clip"] = 1.0
+cfg_ppo["ratio_clip"] = 0.2
+cfg_ppo["value_clip"] = 0.2
+cfg_ppo["clip_predicted_values"] = True
+cfg_ppo["entropy_loss_scale"] = 0.0
+cfg_ppo["value_loss_scale"] = 2.0
+cfg_ppo["kl_threshold"] = 0
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
+# logging to TensorBoard and write checkpoints each 32 and 250 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 32
+cfg_ppo["experiment"]["checkpoint_interval"] = 250
+
+agent = PPO(models=models_ppo,
+            memory=memory, 
+            cfg=cfg_ppo, 
+            observation_space=env.observation_space, 
+            action_space=env.action_space,
+            device=device)
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 5000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start training
+trainer.train()

From 1779eebfa8a39bfaa30eee4e091ff7906f798cac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 19 Sep 2022 16:22:49 +0200
Subject: [PATCH 083/108] Add Franka Emika example to docs

---
 docs/source/intro/examples.rst | 153 +++++++++++++++++++++++++++++++++
 1 file changed, 153 insertions(+)

diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index ea41d7e2..0a365f44 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -768,6 +768,159 @@ These examples show how to train an agent in an Omniverse Isaac Sim environment
             :language: python
             :emphasize-lines: 24-39, 45, 53-68, 73
 
+Real-world examples
+-------------------
+
+These examples show basic real-world use cases to guide and support advanced RL implementations
+
+.. tabs::
+
+    .. tab:: Franka Emika Panda
+
+        **3D reaching task (Franka's gripper must reach a certain target point in space)**. The training was done in Omniverse Isaac Gym. The real robot control is performed through the Python API of a modified version of frankx (see `frankx's pull request #42 <link>`_), a high-level motion library around libfranka. Training and evaluation is performed for both Cartesian and joint control space
+
+        .. raw:: html
+
+            <hr>
+        
+        **Implementation** (see details in the table below):
+
+        * The observation space is composed of the episode's normalized progress, the robot joints' normalized positions (:math:`q`) in the interval -1 to 1, the robot joints' velocities (:math:`\dot{q}`) affected by a random uniform scale for generalization, and the target's position in space (:math:`target_{_{XYZ}}`) with respect to the robot's base
+        
+        * The action space, bounded in the range -1 to 1, consists of the following. For the Cartesian control it's the end-effector's position (:math:`ee_{_{XYZ}}`) scaled change. For the joint control it's robot joints' position scaled change. The end-effector's position corresponds to the gripper fingers, which remain closed all the time
+        
+        * The instantaneous reward is the negative value of the Euclidean distance (:math:`\text{d}`) between the robot end-effector and the target point position. The episode terminates when this distance is less than 0.035 meters in simulation (0.075 meters in real-world) or when the defined maximum timestep is reached
+
+        * The target position lies within a rectangular cuboid of dimensions 0.5 x 0.5 x 0.2 meters centered at 0.5, 0.0, 0.2 meters with respect to the robot's base. The robot joints' positions are drawn from an initial configuration [0º, -45º, 0º, -135º, 0º, 90º, 45º] modified with uniform random values between -7º and 7º approximately
+
+        .. list-table::
+            :header-rows: 1
+
+            * - Variable
+              - Formula / value
+              - Size
+            * - Observation space
+              - :math:`\dfrac{t}{t_{max}},\; 2 \dfrac{q - q_{min}}{q_{max} - q_{min}} - 1,\; 0.1\,\dot{q}\,U(0.5,1.5),\; target_{_{XYZ}}` 
+              - 18
+            * - Action space (joint)
+              - :math:`\dfrac{2.5}{120} \, \Delta q`
+              - 7
+            * - Action space (Cartesian)
+              - :math:`\dfrac{1}{100} \, \Delta ee_{_{XYZ}}`
+              - 3
+            * - Reward
+              - :math:`-\text{d}(ee_{_{XYZ}},\; target_{_{XYZ}})`
+              - 
+            * - Episode termination
+              - :math:`\text{d}(ee_{_{XYZ}},\; target_{_{XYZ}}) \le 0.035 \quad` or :math:`\quad t \ge t_{max} - 1`   
+              - 
+            * - Maximum timesteps (:math:`t_{max}`)
+              - 100
+              - 
+
+        .. raw:: html
+
+            <hr>
+
+        **Workflows**
+
+        .. tabs::
+
+            .. tab:: Simulation
+
+                .. raw:: html
+
+                    <video width="100%" controls autoplay>
+                        <source src="https://user-images.githubusercontent.com/22400377/190926792-6e788eaf-1600-4b13-b8c8-e0e0a09e4827.mp4" type="video/mp4">
+                    </video>
+
+                .. raw:: html
+
+                    <img width="100%" src="https://user-images.githubusercontent.com/22400377/190921341-6feb255a-04d4-4e51-bc7a-f939116dd02d.png">
+
+                |
+
+                **Prerequisites:**
+
+                All installation steps described in Omniverse Isaac Gym's `Overview & Getting Started <https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/tutorial_gym_isaac_gym.html>`_ section must be fulfilled (especially the subsection 1.3. Installing Examples Repository)
+
+                **Files** (the implementation is self-contained so no specific location is required):
+
+                * Environment: :download:`reaching_franka_sim_env.py <../examples/real_world/franka_emika_panda/reaching_franka_sim_env.py>`
+                * Training script: :download:`reaching_franka_sim_skrl_train.py <../examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_train.py>`
+                * Evaluation script: :download:`reaching_franka_sim_skrl_eval.py <../examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_eval.py>`
+
+                **Training and evaluation:**
+
+                .. code-block:: bash
+
+                    # training (local workstation)
+                    ~/.local/share/ov/pkg/isaac_sim-*/python.sh reaching_franka_sim_skrl_train.py
+
+                    # training (docker container)
+                    /isaac-sim/python.sh reaching_franka_sim_skrl_train.py
+
+                .. code-block:: bash
+
+                    # evaluation (local workstation)
+                    ~/.local/share/ov/pkg/isaac_sim-*/python.sh reaching_franka_sim_skrl_eval.py
+
+                    # evaluation (docker container)
+                    /isaac-sim/python.sh reaching_franka_sim_skrl_eval.py
+
+                **Main environment configuration:**
+
+                The control space (Cartesian or joint) can be specified in the task configuration dictionary (from :literal:`reaching_franka_sim_skrl_train.py`) as follow:
+
+                .. code-block:: python
+
+                    TASK_CFG["task"]["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
+
+            .. tab:: Real-world
+
+                .. warning::
+
+                    Make sure you have the e-stop on hand in case something goes wrong in the run. **Control via RL can be dangerous and unsafe for both the operator and the robot**
+
+                .. raw:: html
+
+                    <video width="100%" controls autoplay>
+                        <source src="https://user-images.githubusercontent.com/22400377/190899202-6b80c48d-fc49-48e9-b277-24814d0adab1.mp4" type="video/mp4">
+                    </video>
+                    <strong>Target position entered via the command prompt or generated randomly</strong>
+                    <br><br>
+                    <video width="100%" controls autoplay>
+                        <source src="https://user-images.githubusercontent.com/22400377/190899205-752f654e-9310-4696-a6b2-bfa57d5325f2.mp4" type="video/mp4">
+                    </video>
+                    <strong>Target position in X and Y obtained with a USB-camera (position in Z fixed at 0.2 m)</strong>
+
+                |
+
+                **Prerequisites:**
+
+                A physical Franka robot with `Franka Control Interface (FCI) <https://frankaemika.github.io/docs/index.html>`_ is required. Additionally, the frankx library must be available in the python environment (see `frankx's pull request #42 <link>`_ for the RL-compatible version installation)
+
+                **Files**
+
+                * Environment: :download:`reaching_franka_real_env.py <../examples/real_world/franka_emika_panda/reaching_franka_real_env.py>`
+                * Evaluation script: :download:`reaching_franka_real_skrl_eval.py <../examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py>`
+
+                **Evaluation:**
+
+                .. code-block:: bash
+
+                    python3 reaching_franka_real_skrl_eval.py
+
+                **Main environment configuration:**
+
+                The control space (Cartesian or joint), the robot motion type (waypoint or impedance) and the target position acquisition (command prompt / automatically generated or USB-camera) can be specified in the environment class constructor (from :literal:`reaching_franka_real_skrl_eval.py`) as follow:
+
+                .. code-block:: python
+
+                    control_space = "joint"   # joint or cartesian
+                    motion_type = "waypoint"  # waypoint or impedance
+                    camera_tracking = False   # True for USB-camera tracking 
+
 .. _library_utilities:
 
 Library utilities (skrl.utils module)

From 8ff907c2188935bbea10f694272c53ffa00360a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 19 Sep 2022 22:24:45 +0200
Subject: [PATCH 084/108] Add Omniverse Isaac Gym utilities

---
 skrl/utils/omniverse_isaacgym_utils.py | 265 +++++++++++++++++++++++++
 1 file changed, 265 insertions(+)
 create mode 100644 skrl/utils/omniverse_isaacgym_utils.py

diff --git a/skrl/utils/omniverse_isaacgym_utils.py b/skrl/utils/omniverse_isaacgym_utils.py
new file mode 100644
index 00000000..42d5da67
--- /dev/null
+++ b/skrl/utils/omniverse_isaacgym_utils.py
@@ -0,0 +1,265 @@
+from typing import Optional, Union
+
+import torch
+import numpy as np
+
+
+def _np_quat_mul(a, b):
+    assert a.shape == b.shape
+    shape = a.shape
+    a = a.reshape(-1, 4)
+    b = b.reshape(-1, 4)
+
+    x1, y1, z1, w1 = a[:, 0], a[:, 1], a[:, 2], a[:, 3]
+    x2, y2, z2, w2 = b[:, 0], b[:, 1], b[:, 2], b[:, 3]
+    ww = (z1 + x1) * (x2 + y2)
+    yy = (w1 - y1) * (w2 + z2)
+    zz = (w1 + y1) * (w2 - z2)
+    xx = ww + yy + zz
+    qq = 0.5 * (xx + (z1 - x1) * (x2 - y2))
+    w = qq - ww + (z1 - y1) * (y2 - z2)
+    x = qq - xx + (x1 + w1) * (x2 + w2)
+    y = qq - yy + (w1 - x1) * (y2 + z2)
+    z = qq - zz + (z1 + y1) * (w2 - x2)
+
+    return np.stack([x, y, z, w], axis=-1).reshape(shape)
+
+def _np_quat_conjugate(a):
+    shape = a.shape
+    a = a.reshape(-1, 4)
+    return np.concatenate((-a[:, :3], a[:, -1:]), axis=-1).reshape(shape)
+
+def _torch_quat_mul(a, b):
+    assert a.shape == b.shape
+    shape = a.shape
+    a = a.reshape(-1, 4)
+    b = b.reshape(-1, 4)
+
+    w1, x1, y1, z1 = a[:, 0], a[:, 1], a[:, 2], a[:, 3]
+    w2, x2, y2, z2 = b[:, 0], b[:, 1], b[:, 2], b[:, 3]
+    ww = (z1 + x1) * (x2 + y2)
+    yy = (w1 - y1) * (w2 + z2)
+    zz = (w1 + y1) * (w2 - z2)
+    xx = ww + yy + zz
+    qq = 0.5 * (xx + (z1 - x1) * (x2 - y2))
+    w = qq - ww + (z1 - y1) * (y2 - z2)
+    x = qq - xx + (x1 + w1) * (x2 + w2)
+    y = qq - yy + (w1 - x1) * (y2 + z2)
+    z = qq - zz + (z1 + y1) * (w2 - x2)
+
+    return torch.stack([w, x, y, z], dim=-1).view(shape)
+
+def _torch_quat_conjugate(a):  # wxyz
+    shape = a.shape
+    a = a.reshape(-1, 4)
+    return torch.cat((a[:, :1], -a[:, 1:]), dim=-1).view(shape)
+
+def ik(jacobian_end_effector: torch.Tensor, 
+       current_position: torch.Tensor,
+       current_orientation: torch.Tensor,
+       goal_position: torch.Tensor,
+       goal_orientation: Optional[torch.Tensor] = None,
+       damping_factor: float = 0.05,
+       squeeze_output: bool = True) -> torch.Tensor:
+    """Inverse kinematics using damped least squares method
+
+    :param jacobian_end_effector: End effector's jacobian
+    :type jacobian_end_effector: torch.Tensor
+    :param current_position: End effector's current position
+    :type current_position: torch.Tensor
+    :param current_orientation: End effector's current orientation
+    :type current_orientation: torch.Tensor
+    :param goal_position: End effector's goal position
+    :type goal_position: torch.Tensor
+    :param goal_orientation: End effector's goal orientation (default: ``None``)
+    :type goal_orientation: torch.Tensor, optional
+    :param damping_factor: Damping factor (default: ``0.05``)
+    :type damping_factor: float, optional
+    :param squeeze_output: Squeeze output (default: ``True``)
+    :type squeeze_output: bool, optional
+
+    :return: Change in joint angles
+    :rtype: torch.Tensor
+    """
+    if goal_orientation is None:
+        goal_orientation = current_orientation
+
+    # torch
+    if isinstance(jacobian_end_effector, torch.Tensor):
+        # compute error
+
+        q = _torch_quat_mul(goal_orientation, _torch_quat_conjugate(current_orientation))
+        error = torch.cat([goal_position - current_position,  # position error
+                           q[:, 1:] * torch.sign(q[:, 0]).unsqueeze(-1)],  # orientation error
+                          dim=-1).unsqueeze(-1)
+
+        # solve damped least squares (dO = J.T * V)
+        transpose = torch.transpose(jacobian_end_effector, 1, 2)
+        lmbda = torch.eye(6, device=jacobian_end_effector.device) * (damping_factor ** 2)
+        if squeeze_output:
+            return (transpose @ torch.inverse(jacobian_end_effector @ transpose + lmbda) @ error).squeeze(dim=2)
+        else:
+            return transpose @ torch.inverse(jacobian_end_effector @ transpose + lmbda) @ error
+
+    # numpy
+    # TODO: test and fix this
+    else:
+        # compute error
+        q = _np_quat_mul(goal_orientation, _np_quat_conjugate(current_orientation))
+        error = np.concatenate([goal_position - current_position,  # position error
+                                q[:, 0:3] * np.sign(q[:, 3])])  # orientation error
+
+        # solve damped least squares (dO = J.T * V)
+        transpose = np.transpose(jacobian_end_effector, 1, 2)
+        lmbda = np.eye(6) * (damping_factor ** 2)
+        if squeeze_output:
+            return (transpose @ np.linalg.inv(jacobian_end_effector @ transpose + lmbda) @ error)
+        else:
+            return transpose @ np.linalg.inv(jacobian_end_effector @ transpose + lmbda) @ error
+
+def get_env_instance(headless: bool = True, multi_threaded: bool = False) -> "omni.isaac.gym.vec_env.VecEnvBase":
+    """
+    Instantiate a VecEnvBase-based object compatible with OmniIsaacGymEnvs
+
+    :param headless: Disable UI when running (default: ``True``)
+    :type headless: bool, optional
+    :param multi_threaded: Whether to return a multi-threaded environment instance (default: ``False``)
+    :type multi_threaded: bool, optional
+
+    :return: Environment instance
+    :rtype: omni.isaac.gym.vec_env.VecEnvBase
+
+    Example::
+
+        from skrl.envs.torch import wrap_env
+        from skrl.utils.omniverse_isaacgym_utils import get_env_instance
+
+        # get environment instance
+        env = get_env_instance(headless=True)
+
+        # parse sim configuration
+        from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig
+        sim_config = SimConfig({"test": False, 
+                                "device_id": 0, 
+                                "headless": True,
+                                "sim_device": "gpu", 
+                                "task": {"name": "CustomTask", 
+                                         "physics_engine": "physx", 
+                                         "env": {"numEnvs": 512, 
+                                                 "envSpacing": 1.5, 
+                                                 "enableDebugVis": False, 
+                                                 "clipObservations": 1000.0, 
+                                                 "clipActions": 1.0, 
+                                                 "controlFrequencyInv": 4}, 
+                                         "sim": {"dt": 0.0083,  # 1 / 120
+                                                 "use_gpu_pipeline": True, 
+                                                 "gravity": [0.0, 0.0, -9.81], 
+                                                 "add_ground_plane": True, 
+                                                 "use_flatcache": True, 
+                                                 "enable_scene_query_support": False, 
+                                                 "enable_cameras": False, 
+                                                 "default_physics_material": {"static_friction": 1.0, 
+                                                                              "dynamic_friction": 1.0, 
+                                                                              "restitution": 0.0}, 
+                                                 "physx": {"worker_thread_count": 4, 
+                                                           "solver_type": 1, 
+                                                           "use_gpu": True, 
+                                                           "solver_position_iteration_count": 4, 
+                                                           "solver_velocity_iteration_count": 1, 
+                                                           "contact_offset": 0.005, 
+                                                           "rest_offset": 0.0, 
+                                                           "bounce_threshold_velocity": 0.2, 
+                                                           "friction_offset_threshold": 0.04, 
+                                                           "friction_correlation_distance": 0.025, 
+                                                           "enable_sleeping": True, 
+                                                           "enable_stabilization": True, 
+                                                           "max_depenetration_velocity": 1000.0, 
+                                                           "gpu_max_rigid_contact_count": 524288, 
+                                                           "gpu_max_rigid_patch_count": 33554432, 
+                                                           "gpu_found_lost_pairs_capacity": 524288, 
+                                                           "gpu_found_lost_aggregate_pairs_capacity": 262144, 
+                                                           "gpu_total_aggregate_pairs_capacity": 1048576, 
+                                                           "gpu_max_soft_body_contacts": 1048576, 
+                                                           "gpu_max_particle_contacts": 1048576, 
+                                                           "gpu_heap_capacity": 33554432, 
+                                                           "gpu_temp_buffer_capacity": 16777216, 
+                                                           "gpu_max_num_partitions": 8}}}})
+
+        # import and setup custom task
+        from custom_task import CustomTask
+        task = CustomTask(name="CustomTask", sim_config=sim_config, env=env)
+        env.set_task(task=task, sim_params=sim_config.get_physics_params(), backend="torch", init_sim=True)
+
+        # wrap the environment
+        env = wrap_env(env, "omniverse-isaacgym")
+    """
+    from omni.isaac.gym.vec_env import VecEnvBase, VecEnvMT, TaskStopException
+    from omni.isaac.gym.vec_env.vec_env_mt import TrainerMT
+
+    class _OmniIsaacGymVecEnv(VecEnvBase):
+        def step(self, actions):
+            actions = torch.clamp(actions, -self._task.clip_actions, self._task.clip_actions).to(self._task.device).clone()
+            self._task.pre_physics_step(actions)
+
+            for _ in range(self._task.control_frequency_inv):
+                self._world.step(render=self._render)
+                self.sim_frame_count += 1
+
+            observations, rewards, dones, info = self._task.post_physics_step()
+
+            return {"obs": torch.clamp(observations, -self._task.clip_obs, self._task.clip_obs).to(self._task.rl_device).clone()}, \
+                rewards.to(self._task.rl_device).clone(), dones.to(self._task.rl_device).clone(), info.copy()
+
+        def reset(self):
+            self._task.reset()
+            actions = torch.zeros((self.num_envs, self._task.num_actions), device=self._task.device)
+            return self.step(actions)[0]
+
+    class _OmniIsaacGymTrainerMT(TrainerMT):
+        def run(self):
+            pass
+
+        def stop(self):
+            pass
+
+    class _OmniIsaacGymVecEnvMT(VecEnvMT):
+        def __init__(self, headless):
+            super().__init__(headless)
+            
+            self.action_queue = queue.Queue(1)
+            self.data_queue = queue.Queue(1)
+
+        def run(self, trainer=None):
+            super().run(_OmniIsaacGymTrainerMT() if trainer is None else trainer)
+
+        def _parse_data(self, data):
+            self._observations = torch.clamp(data["obs"], -self._task.clip_obs, self._task.clip_obs).to(self._task.rl_device).clone()
+            self._rewards = data["rew"].to(self._task.rl_device).clone()
+            self._dones = data["reset"].to(self._task.rl_device).clone()
+            self._info = data["extras"].copy()
+
+        def step(self, actions):
+            if self._stop:
+                raise TaskStopException()
+
+            actions = torch.clamp(actions, -self._task.clip_actions, self._task.clip_actions).clone()
+
+            self.send_actions(actions)
+            data = self.get_data()
+
+            return {"obs": self._observations}, self._rewards, self._dones, self._info
+
+        def reset(self):
+            self._task.reset()
+            actions = torch.zeros((self.num_envs, self._task.num_actions), device=self._task.device)
+            return self.step(actions)[0]
+
+        def close(self):
+            # end stop signal to main thread
+            self.send_actions(None)
+            self.stop = True
+
+    if multi_threaded:
+        return _OmniIsaacGymVecEnvMT(headless=headless)
+    else:
+        return _OmniIsaacGymVecEnv(headless=headless)

From aba1edf47ca4c902f24c6439778aa8923fc8e739 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 19 Sep 2022 22:26:30 +0200
Subject: [PATCH 085/108] Add Omniverse Isaac Gym utilities to docs

---
 docs/source/index.rst                         |  2 ++
 .../skrl.utils.omniverse_isaacgym_utils.rst   | 20 +++++++++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 0569fb7c..d48d8ce9 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -198,6 +198,7 @@ Utils
     * :doc:`Model instantiators <modules/skrl.utils.model_instantiators>`
     * Memory and Tensorboard :doc:`file post-processing <modules/skrl.utils.postprocessing>`
     * :doc:`Isaac Gym utils <modules/skrl.utils.isaacgym_utils>`
+    * :doc:`Omniverse Isaac Gym utils <modules/skrl.utils.omniverse_isaacgym_utils>`
 
 .. toctree::
     :maxdepth: 1
@@ -208,3 +209,4 @@ Utils
     modules/skrl.utils.model_instantiators
     modules/skrl.utils.postprocessing
     modules/skrl.utils.isaacgym_utils
+    modules/skrl.utils.omniverse_isaacgym_utils
diff --git a/docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst b/docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst
index 806ed6d8..8b4102e5 100644
--- a/docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst
+++ b/docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst
@@ -2,13 +2,13 @@ Omniverse Isaac Gym utils
 =========================
 
 .. contents:: Table of Contents
-   :depth: 2
-   :local:
-   :backlinks: none
+    :depth: 2
+    :local:
+    :backlinks: none
 
 .. raw:: html
 
-   <hr>
+    <hr>
 
 Control of robotic manipulators
 -------------------------------
@@ -31,3 +31,15 @@ API
 """
 
 .. autofunction:: skrl.utils.omniverse_isaacgym_utils.ik
+
+.. raw:: html
+
+    <hr>
+
+OmniIsaacGymEnvs-like environment instance
+------------------------------------------
+
+API
+"""
+
+.. autofunction:: skrl.utils.omniverse_isaacgym_utils.get_env_instance
\ No newline at end of file

From bf99a34ecf020f1ee46b4d8c6c63d5916a8d30b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 19 Sep 2022 22:31:41 +0200
Subject: [PATCH 086/108] Add a download link to franka emika example trained
 checkpoints

---
 docs/source/intro/examples.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index 0a365f44..968711fb 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -849,6 +849,7 @@ These examples show basic real-world use cases to guide and support advanced RL
                 * Environment: :download:`reaching_franka_sim_env.py <../examples/real_world/franka_emika_panda/reaching_franka_sim_env.py>`
                 * Training script: :download:`reaching_franka_sim_skrl_train.py <../examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_train.py>`
                 * Evaluation script: :download:`reaching_franka_sim_skrl_eval.py <../examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_eval.py>`
+                * Checkpoints (:literal:`agent_joint.pt`, :literal:`agent_cartesian.pt`): :download:`trained_checkpoints.zip <https://github.com/Toni-SM/skrl/files/9595293/trained_checkpoints.zip>`
 
                 **Training and evaluation:**
 
@@ -904,6 +905,7 @@ These examples show basic real-world use cases to guide and support advanced RL
 
                 * Environment: :download:`reaching_franka_real_env.py <../examples/real_world/franka_emika_panda/reaching_franka_real_env.py>`
                 * Evaluation script: :download:`reaching_franka_real_skrl_eval.py <../examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py>`
+                * Checkpoints (:literal:`agent_joint.pt`, :literal:`agent_cartesian.pt`): :download:`trained_checkpoints.zip <https://github.com/Toni-SM/skrl/files/9595293/trained_checkpoints.zip>`
 
                 **Evaluation:**
 

From 55b9ddf5d1f69b8744b46b3cfb5983ffdd582497 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 19 Sep 2022 23:31:09 +0200
Subject: [PATCH 087/108] Update frankx pull request link

---
 docs/source/intro/examples.rst | 96 +++++++++++++++++-----------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index 968711fb..0b4c5b88 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -777,7 +777,7 @@ These examples show basic real-world use cases to guide and support advanced RL
 
     .. tab:: Franka Emika Panda
 
-        **3D reaching task (Franka's gripper must reach a certain target point in space)**. The training was done in Omniverse Isaac Gym. The real robot control is performed through the Python API of a modified version of frankx (see `frankx's pull request #42 <link>`_), a high-level motion library around libfranka. Training and evaluation is performed for both Cartesian and joint control space
+        **3D reaching task (Franka's gripper must reach a certain target point in space)**. The training was done in Omniverse Isaac Gym. The real robot control is performed through the Python API of a modified version of frankx (see `frankx's pull request #44 <https://github.com/pantor/frankx/pull/44>`_), a high-level motion library around libfranka. Training and evaluation is performed for both Cartesian and joint control space
 
         .. raw:: html
 
@@ -787,7 +787,7 @@ These examples show basic real-world use cases to guide and support advanced RL
 
         * The observation space is composed of the episode's normalized progress, the robot joints' normalized positions (:math:`q`) in the interval -1 to 1, the robot joints' velocities (:math:`\dot{q}`) affected by a random uniform scale for generalization, and the target's position in space (:math:`target_{_{XYZ}}`) with respect to the robot's base
         
-        * The action space, bounded in the range -1 to 1, consists of the following. For the Cartesian control it's the end-effector's position (:math:`ee_{_{XYZ}}`) scaled change. For the joint control it's robot joints' position scaled change. The end-effector's position corresponds to the gripper fingers, which remain closed all the time
+        * The action space, bounded in the range -1 to 1, consists of the following. For the joint control it's robot joints' position scaled change. For the Cartesian control it's the end-effector's position (:math:`ee_{_{XYZ}}`) scaled change. The end-effector position frame corresponds to the point where the left finger connects to the gripper base in simulation, whereas in the real world it corresponds to the end of the fingers. The gripper fingers remain closed all the time in both cases
         
         * The instantaneous reward is the negative value of the Euclidean distance (:math:`\text{d}`) between the robot end-effector and the target point position. The episode terminates when this distance is less than 0.035 meters in simulation (0.075 meters in real-world) or when the defined maximum timestep is reached
 
@@ -826,6 +826,52 @@ These examples show basic real-world use cases to guide and support advanced RL
 
         .. tabs::
 
+            .. tab:: Real-world
+
+                .. warning::
+
+                    Make sure you have the e-stop on hand in case something goes wrong in the run. **Control via RL can be dangerous and unsafe for both the operator and the robot**
+
+                .. raw:: html
+
+                    <video width="100%" controls autoplay>
+                        <source src="https://user-images.githubusercontent.com/22400377/190899202-6b80c48d-fc49-48e9-b277-24814d0adab1.mp4" type="video/mp4">
+                    </video>
+                    <strong>Target position entered via the command prompt or generated randomly</strong>
+                    <br><br>
+                    <video width="100%" controls autoplay>
+                        <source src="https://user-images.githubusercontent.com/22400377/190899205-752f654e-9310-4696-a6b2-bfa57d5325f2.mp4" type="video/mp4">
+                    </video>
+                    <strong>Target position in X and Y obtained with a USB-camera (position in Z fixed at 0.2 m)</strong>
+
+                |
+
+                **Prerequisites:**
+
+                A physical Franka robot with `Franka Control Interface (FCI) <https://frankaemika.github.io/docs/index.html>`_ is required. Additionally, the frankx library must be available in the python environment (see `frankx's pull request #44 <https://github.com/pantor/frankx/pull/44>`_ for the RL-compatible version installation)
+
+                **Files**
+
+                * Environment: :download:`reaching_franka_real_env.py <../examples/real_world/franka_emika_panda/reaching_franka_real_env.py>`
+                * Evaluation script: :download:`reaching_franka_real_skrl_eval.py <../examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py>`
+                * Checkpoints (:literal:`agent_joint.pt`, :literal:`agent_cartesian.pt`): :download:`trained_checkpoints.zip <https://github.com/Toni-SM/skrl/files/9595293/trained_checkpoints.zip>`
+
+                **Evaluation:**
+
+                .. code-block:: bash
+
+                    python3 reaching_franka_real_skrl_eval.py
+
+                **Main environment configuration:**
+
+                The control space (Cartesian or joint), the robot motion type (waypoint or impedance) and the target position acquisition (command prompt / automatically generated or USB-camera) can be specified in the environment class constructor (from :literal:`reaching_franka_real_skrl_eval.py`) as follow:
+
+                .. code-block:: python
+
+                    control_space = "joint"   # joint or cartesian
+                    motion_type = "waypoint"  # waypoint or impedance
+                    camera_tracking = False   # True for USB-camera tracking
+
             .. tab:: Simulation
 
                 .. raw:: html
@@ -877,52 +923,6 @@ These examples show basic real-world use cases to guide and support advanced RL
 
                     TASK_CFG["task"]["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
 
-            .. tab:: Real-world
-
-                .. warning::
-
-                    Make sure you have the e-stop on hand in case something goes wrong in the run. **Control via RL can be dangerous and unsafe for both the operator and the robot**
-
-                .. raw:: html
-
-                    <video width="100%" controls autoplay>
-                        <source src="https://user-images.githubusercontent.com/22400377/190899202-6b80c48d-fc49-48e9-b277-24814d0adab1.mp4" type="video/mp4">
-                    </video>
-                    <strong>Target position entered via the command prompt or generated randomly</strong>
-                    <br><br>
-                    <video width="100%" controls autoplay>
-                        <source src="https://user-images.githubusercontent.com/22400377/190899205-752f654e-9310-4696-a6b2-bfa57d5325f2.mp4" type="video/mp4">
-                    </video>
-                    <strong>Target position in X and Y obtained with a USB-camera (position in Z fixed at 0.2 m)</strong>
-
-                |
-
-                **Prerequisites:**
-
-                A physical Franka robot with `Franka Control Interface (FCI) <https://frankaemika.github.io/docs/index.html>`_ is required. Additionally, the frankx library must be available in the python environment (see `frankx's pull request #42 <link>`_ for the RL-compatible version installation)
-
-                **Files**
-
-                * Environment: :download:`reaching_franka_real_env.py <../examples/real_world/franka_emika_panda/reaching_franka_real_env.py>`
-                * Evaluation script: :download:`reaching_franka_real_skrl_eval.py <../examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py>`
-                * Checkpoints (:literal:`agent_joint.pt`, :literal:`agent_cartesian.pt`): :download:`trained_checkpoints.zip <https://github.com/Toni-SM/skrl/files/9595293/trained_checkpoints.zip>`
-
-                **Evaluation:**
-
-                .. code-block:: bash
-
-                    python3 reaching_franka_real_skrl_eval.py
-
-                **Main environment configuration:**
-
-                The control space (Cartesian or joint), the robot motion type (waypoint or impedance) and the target position acquisition (command prompt / automatically generated or USB-camera) can be specified in the environment class constructor (from :literal:`reaching_franka_real_skrl_eval.py`) as follow:
-
-                .. code-block:: python
-
-                    control_space = "joint"   # joint or cartesian
-                    motion_type = "waypoint"  # waypoint or impedance
-                    camera_tracking = False   # True for USB-camera tracking 
-
 .. _library_utilities:
 
 Library utilities (skrl.utils module)

From 4afc188cef54c4cfc265849321960e3793ba0586 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Thu, 22 Sep 2022 00:21:22 +0200
Subject: [PATCH 088/108] Generate random seed and enable deterministic

---
 skrl/utils/__init__.py | 66 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 61 insertions(+), 5 deletions(-)

diff --git a/skrl/utils/__init__.py b/skrl/utils/__init__.py
index 077e31a9..778ef728 100644
--- a/skrl/utils/__init__.py
+++ b/skrl/utils/__init__.py
@@ -1,12 +1,22 @@
+from typing import Optional
+
+import os
+import sys
+import time
+import torch
 import random
 import numpy as np
-import torch
+
+from skrl import logger
 
 
-def set_seed(seed: int) -> None:
+def set_seed(seed: Optional[int] = None, deterministic: bool = False) -> int:
     """
     Set the seed for the random number generators
 
+    Due to NumPy's legacy seeding constraint the seed must be between 0 and 2**32 - 1. 
+    Otherwise a NumPy exception (``ValueError: Seed must be between 0 and 2**32 - 1``) will be raised 
+
     Modified packages:
 
     - random
@@ -15,15 +25,61 @@ def set_seed(seed: int) -> None:
 
     Example::
 
+        # fixed seed
         >>> from skrl.utils import set_seed
         >>> set_seed(42)
+        [skrl:INFO] Seed: 42
+        42
+
+        # random seed
+        >>> set_seed()
+        >>> from skrl.utils import set_seed
+        [skrl:INFO] Seed: 1776118066
+        1776118066
+
+        # enable deterministic. The following environment variables should be established:
+        # - CUDA 10.1: CUDA_LAUNCH_BLOCKING=1
+        # - CUDA 10.2 or later: CUBLAS_WORKSPACE_CONFIG=:16:8 or CUBLAS_WORKSPACE_CONFIG=:4096:2
+        >>> from skrl.utils import set_seed
+        >>> set_seed(42, deterministic=True)
+        [skrl:INFO] Seed: 42
+        [skrl:WARNING] PyTorch/cuDNN deterministic algorithms are enabled. This may affect performance
+        42
+
+    :param seed: The seed to set. Is None, a random seed will be generated (default: ``None``)
+    :type seed: int, optional
+    :param deterministic: Whether PyTorch is configured to use deterministic algorithms (default: ``False``).
+                          The following environment variables should be established for CUDA 10.1 (``CUDA_LAUNCH_BLOCKING=1``) 
+                          and for CUDA 10.2 or later (``CUBLAS_WORKSPACE_CONFIG=:16:8`` or ``CUBLAS_WORKSPACE_CONFIG=:4096:2``).
+                          See PyTorch `Reproducibility <https://pytorch.org/docs/stable/notes/randomness.html>`_ for details
+    :type deterministic: bool, optional
 
-    :param seed: The seed to set
-    :type seed: int
+    :return: Seed
+    :rtype: int
     """
+    # generate a random seed
+    if seed is None:
+        try:
+            seed = int.from_bytes(os.urandom(4), byteorder=sys.byteorder)
+        except NotImplementedError:
+            seed = int(time.time() * 1000)
+        seed %= 2 ** 31  # NumPy's legacy seeding seed must be between 0 and 2**32 - 1
+
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
     torch.cuda.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
-    
\ No newline at end of file
+
+    logger.info("Seed: {}".format(seed))
+
+    if deterministic:
+        torch.backends.cudnn.benchmark = False
+        torch.backends.cudnn.deterministic = True
+
+        # On CUDA 10.1, set environment variable CUDA_LAUNCH_BLOCKING=1
+        # On CUDA 10.2 or later, set environment variable CUBLAS_WORKSPACE_CONFIG=:16:8 or CUBLAS_WORKSPACE_CONFIG=:4096:2
+
+        logger.warning("PyTorch/cuDNN deterministic algorithms are enabled. This may affect performance")
+
+    return seed

From 7f91d4cb203545b871ccc161d61deb07f7a38b3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Thu, 22 Sep 2022 21:58:27 +0200
Subject: [PATCH 089/108] Add reference link to resources in docs

---
 docs/source/modules/skrl.resources.noises.rst        | 2 ++
 docs/source/modules/skrl.resources.preprocessors.rst | 2 ++
 docs/source/modules/skrl.resources.schedulers.rst    | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/docs/source/modules/skrl.resources.noises.rst b/docs/source/modules/skrl.resources.noises.rst
index a988f0fe..e25c87c4 100644
--- a/docs/source/modules/skrl.resources.noises.rst
+++ b/docs/source/modules/skrl.resources.noises.rst
@@ -1,3 +1,5 @@
+.. _resources_noises:
+
 Noises
 ======
 
diff --git a/docs/source/modules/skrl.resources.preprocessors.rst b/docs/source/modules/skrl.resources.preprocessors.rst
index d44aa631..21c44a10 100644
--- a/docs/source/modules/skrl.resources.preprocessors.rst
+++ b/docs/source/modules/skrl.resources.preprocessors.rst
@@ -1,3 +1,5 @@
+.. _resources_preprocessors:
+
 Preprocessors
 =============
 
diff --git a/docs/source/modules/skrl.resources.schedulers.rst b/docs/source/modules/skrl.resources.schedulers.rst
index e360b730..a879dae6 100644
--- a/docs/source/modules/skrl.resources.schedulers.rst
+++ b/docs/source/modules/skrl.resources.schedulers.rst
@@ -1,3 +1,5 @@
+.. _resources_schedulers:
+
 Learning rate schedulers
 ========================
 

From d8a23abebb07d5e1cccf229585e59e581232a8b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Thu, 22 Sep 2022 21:59:30 +0200
Subject: [PATCH 090/108] Update example code in model's constructor docstring

---
 skrl/models/torch/gaussian.py              | 4 ++--
 skrl/models/torch/multivariate_gaussian.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index c56fce15..53fa04b9 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -42,9 +42,9 @@ def __init__(self,
             >>> 
             >>> class Policy(GaussianMixin, Model):
             ...     def __init__(self, observation_space, action_space, device="cuda:0", 
-            ...                  clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2):
+            ...                  clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
             ...         Model.__init__(self, observation_space, action_space, device)
-            ...         GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+            ...         GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
             ...
             ...         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
             ...                                  nn.ELU(),
diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py
index b2b85449..4ca63e26 100644
--- a/skrl/models/torch/multivariate_gaussian.py
+++ b/skrl/models/torch/multivariate_gaussian.py
@@ -34,8 +34,8 @@ def __init__(self,
             >>> from skrl.models.torch import Model, MultivariateGaussianMixin
             >>> 
             >>> class Policy(MultivariateGaussianMixin, Model):
-            ...     def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False,
-            ...                  clip_log_std=True, min_log_std=-20, max_log_std=2):
+            ...     def __init__(self, observation_space, action_space, device="cuda:0",
+            ...                  clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2):
             ...         Model.__init__(self, observation_space, action_space, device)
             ...         MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
             ...

From 1396f5b8b543e2fd5ba6d8628620bdc6752508b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <aserrano@mondragon.edu>
Date: Fri, 30 Sep 2022 09:53:29 +0200
Subject: [PATCH 091/108] Update Omniverse Isaac Gym examples

---
 .../examples/omniisaacgym/ppo_allegro_hand.py |  65 ++++------
 docs/source/examples/omniisaacgym/ppo_ant.py  |  51 +++-----
 .../examples/omniisaacgym/ppo_ant_mt.py       |  51 +++-----
 .../examples/omniisaacgym/ppo_anymal.py       | 115 +++++++++++++++++
 .../omniisaacgym/ppo_anymal_terrain.py        | 120 ++++++++++++++++++
 .../examples/omniisaacgym/ppo_ball_balance.py | 115 +++++++++++++++++
 .../examples/omniisaacgym/ppo_cartpole.py     |  49 +++----
 .../examples/omniisaacgym/ppo_cartpole_mt.py  |  49 +++----
 .../examples/omniisaacgym/ppo_crazy_flie.py   | 115 +++++++++++++++++
 .../omniisaacgym/ppo_franka_cabinet.py        | 115 +++++++++++++++++
 .../examples/omniisaacgym/ppo_humanoid.py     |  51 +++-----
 .../examples/omniisaacgym/ppo_ingenuity.py    | 115 +++++++++++++++++
 .../examples/omniisaacgym/ppo_quadcopter.py   | 115 +++++++++++++++++
 .../examples/omniisaacgym/ppo_shadow_hand.py  |  67 ++++------
 14 files changed, 971 insertions(+), 222 deletions(-)
 create mode 100644 docs/source/examples/omniisaacgym/ppo_anymal.py
 create mode 100644 docs/source/examples/omniisaacgym/ppo_anymal_terrain.py
 create mode 100644 docs/source/examples/omniisaacgym/ppo_ball_balance.py
 create mode 100644 docs/source/examples/omniisaacgym/ppo_crazy_flie.py
 create mode 100644 docs/source/examples/omniisaacgym/ppo_franka_cabinet.py
 create mode 100644 docs/source/examples/omniisaacgym/ppo_ingenuity.py
 create mode 100644 docs/source/examples/omniisaacgym/ppo_quadcopter.py

diff --git a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py
index 8b3f589f..b83b0933 100644
--- a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py
+++ b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py
@@ -17,42 +17,37 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
                                  nn.Linear(512, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(128, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(128, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
-                                 nn.ELU(),
-                                 nn.Linear(512, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Omniverse Isaac Gym environment
@@ -63,28 +58,24 @@ def compute(self, states, taken_actions, role):
 
 
 # Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
-memory = RandomMemory(memory_size=8, num_envs=env.num_envs, device=device)
+memory = RandomMemory(memory_size=16, num_envs=env.num_envs, device=device)
 
 
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 8  # memory_size
+cfg_ppo["rollouts"] = 16  # memory_size
 cfg_ppo["learning_epochs"] = 5
-cfg_ppo["mini_batches"] = 4  # 8 * 16384 / 32768
+cfg_ppo["mini_batches"] = 4  # 16 * 8192 / 32768
 cfg_ppo["discount_factor"] = 0.99
 cfg_ppo["lambda"] = 0.95
 cfg_ppo["learning_rate"] = 5e-3
@@ -104,9 +95,9 @@ def compute(self, states, taken_actions, role):
 cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
 cfg_ppo["value_preprocessor"] = RunningStandardScaler
 cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
-# logging to TensorBoard and write checkpoints each 200 and 2000 timesteps respectively
-cfg_ppo["experiment"]["write_interval"] = 200
-cfg_ppo["experiment"]["checkpoint_interval"] = 2000
+# logging to TensorBoard and write checkpoints each 800 and 8000 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 800
+cfg_ppo["experiment"]["checkpoint_interval"] = 8000
 
 agent = PPO(models=models_ppo,
             memory=memory, 
@@ -117,7 +108,7 @@ def compute(self, states, taken_actions, role):
 
 
 # Configure and instantiate the RL trainer
-cfg_trainer = {"timesteps": 40000, "headless": True}
+cfg_trainer = {"timesteps": 160000, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # start training
diff --git a/docs/source/examples/omniisaacgym/ppo_ant.py b/docs/source/examples/omniisaacgym/ppo_ant.py
index e6482534..4282b76d 100644
--- a/docs/source/examples/omniisaacgym/ppo_ant.py
+++ b/docs/source/examples/omniisaacgym/ppo_ant.py
@@ -17,42 +17,37 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 128),
                                  nn.ELU(),
                                  nn.Linear(128, 64),
-                                 nn.ELU(),
-                                 nn.Linear(64, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(64, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(64, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 64),
-                                 nn.ELU(),
-                                 nn.Linear(64, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Omniverse Isaac Gym environment
@@ -70,12 +65,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/omniisaacgym/ppo_ant_mt.py b/docs/source/examples/omniisaacgym/ppo_ant_mt.py
index 6072cfc0..d3bdbc50 100644
--- a/docs/source/examples/omniisaacgym/ppo_ant_mt.py
+++ b/docs/source/examples/omniisaacgym/ppo_ant_mt.py
@@ -19,42 +19,37 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 128),
                                  nn.ELU(),
                                  nn.Linear(128, 64),
-                                 nn.ELU(),
-                                 nn.Linear(64, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(64, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(64, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 64),
-                                 nn.ELU(),
-                                 nn.Linear(64, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Omniverse Isaac Gym environment
@@ -72,12 +67,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model  
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/omniisaacgym/ppo_anymal.py b/docs/source/examples/omniisaacgym/ppo_anymal.py
new file mode 100644
index 00000000..72505e7e
--- /dev/null
+++ b/docs/source/examples/omniisaacgym/ppo_anymal.py
@@ -0,0 +1,115 @@
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
+from skrl.memories.torch import RandomMemory
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+from skrl.envs.torch import load_omniverse_isaacgym_env
+from skrl.utils import set_seed
+
+
+# set the seed for reproducibility
+set_seed(42)
+
+
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, 64),
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(64, self.num_actions)
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(64, 1)
+
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
+
+    def compute(self, states, taken_actions, role):
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
+
+
+# Load and wrap the Omniverse Isaac Gym environment
+env = load_omniverse_isaacgym_env(task_name="Anymal")
+env = wrap_env(env)
+
+device = env.device
+
+
+# Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
+memory = RandomMemory(memory_size=24, num_envs=env.num_envs, device=device)
+
+
+# Instantiate the agent's models (function approximators).
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["rollouts"] = 24  # memory_size
+cfg_ppo["learning_epochs"] = 5
+cfg_ppo["mini_batches"] = 3  # 24 * 4096 / 32768
+cfg_ppo["discount_factor"] = 0.99
+cfg_ppo["lambda"] = 0.95
+cfg_ppo["learning_rate"] = 3e-4
+cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
+cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008}
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["grad_norm_clip"] = 1.0
+cfg_ppo["ratio_clip"] = 0.2
+cfg_ppo["value_clip"] = 0.2
+cfg_ppo["clip_predicted_values"] = True
+cfg_ppo["entropy_loss_scale"] = 0.0
+cfg_ppo["value_loss_scale"] = 1.0
+cfg_ppo["kl_threshold"] = 0
+cfg_ppo["rewards_shaper"] = None
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
+# logging to TensorBoard and write checkpoints each 120 and 1200 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 120
+cfg_ppo["experiment"]["checkpoint_interval"] = 1200
+
+agent = PPO(models=models_ppo,
+            memory=memory, 
+            cfg=cfg_ppo, 
+            observation_space=env.observation_space, 
+            action_space=env.action_space,
+            device=device)
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 24000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start training
+trainer.train()
diff --git a/docs/source/examples/omniisaacgym/ppo_anymal_terrain.py b/docs/source/examples/omniisaacgym/ppo_anymal_terrain.py
new file mode 100644
index 00000000..5b6aaf80
--- /dev/null
+++ b/docs/source/examples/omniisaacgym/ppo_anymal_terrain.py
@@ -0,0 +1,120 @@
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
+from skrl.memories.torch import RandomMemory
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+from skrl.envs.torch import load_omniverse_isaacgym_env
+from skrl.utils import set_seed
+
+
+# set the seed for reproducibility
+set_seed(42)
+
+
+# Define the models (stochastic and deterministic models) for the agent using mixins.
+# - Policy: takes as input the environment's observation/state and returns an action
+# - Value: takes the state as input and provides a value to guide the policy
+class Policy(GaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
+                                 nn.ELU(),
+                                 nn.Linear(512, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, self.num_actions))
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
+
+class Value(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
+                                 nn.ELU(),
+                                 nn.Linear(512, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, 1))
+
+    def compute(self, states, taken_actions, role):
+        return self.net(states)
+
+
+# Load and wrap the Omniverse Isaac Gym environment
+env = load_omniverse_isaacgym_env(task_name="AnymalTerrain")
+env = wrap_env(env)
+
+device = env.device
+
+
+# Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
+memory = RandomMemory(memory_size=48, num_envs=env.num_envs, device=device)
+
+
+# Instantiate the agent's models (function approximators).
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["rollouts"] = 48  # memory_size
+cfg_ppo["learning_epochs"] = 5
+cfg_ppo["mini_batches"] = 6  # 48 * 2048 / 16384
+cfg_ppo["discount_factor"] = 0.99
+cfg_ppo["lambda"] = 0.95
+cfg_ppo["learning_rate"] = 3e-4
+cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
+cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008}
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["grad_norm_clip"] = 1.0
+cfg_ppo["ratio_clip"] = 0.2
+cfg_ppo["value_clip"] = 0.2
+cfg_ppo["clip_predicted_values"] = True
+cfg_ppo["entropy_loss_scale"] = 0.001
+cfg_ppo["value_loss_scale"] = 1.0
+cfg_ppo["kl_threshold"] = 0
+cfg_ppo["rewards_shaper"] = None
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
+# logging to TensorBoard and write checkpoints each 480 and 4800 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 480
+cfg_ppo["experiment"]["checkpoint_interval"] = 4800
+
+agent = PPO(models=models_ppo,
+            memory=memory, 
+            cfg=cfg_ppo, 
+            observation_space=env.observation_space, 
+            action_space=env.action_space,
+            device=device)
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 96000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start training
+trainer.train()
diff --git a/docs/source/examples/omniisaacgym/ppo_ball_balance.py b/docs/source/examples/omniisaacgym/ppo_ball_balance.py
new file mode 100644
index 00000000..f63dbe3a
--- /dev/null
+++ b/docs/source/examples/omniisaacgym/ppo_ball_balance.py
@@ -0,0 +1,115 @@
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
+from skrl.memories.torch import RandomMemory
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+from skrl.envs.torch import load_omniverse_isaacgym_env
+from skrl.utils import set_seed
+
+
+# set the seed for reproducibility
+set_seed(42)
+
+
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, 64),
+                                 nn.ELU(),
+                                 nn.Linear(64, 32),
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(32, self.num_actions)
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(32, 1)
+
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
+
+    def compute(self, states, taken_actions, role):
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
+
+
+# Load and wrap the Omniverse Isaac Gym environment
+env = load_omniverse_isaacgym_env(task_name="BallBalance")
+env = wrap_env(env)
+
+device = env.device
+
+
+# Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
+memory = RandomMemory(memory_size=16, num_envs=env.num_envs, device=device)
+
+
+# Instantiate the agent's models (function approximators).
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["rollouts"] = 16  # memory_size
+cfg_ppo["learning_epochs"] = 8
+cfg_ppo["mini_batches"] = 8  # 16 * 4096 / 8192
+cfg_ppo["discount_factor"] = 0.99
+cfg_ppo["lambda"] = 0.95
+cfg_ppo["learning_rate"] = 3e-4
+cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
+cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008}
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["grad_norm_clip"] = 1.0
+cfg_ppo["ratio_clip"] = 0.2
+cfg_ppo["value_clip"] = 0.2
+cfg_ppo["clip_predicted_values"] = True
+cfg_ppo["entropy_loss_scale"] = 0.0
+cfg_ppo["value_loss_scale"] = 2.0
+cfg_ppo["kl_threshold"] = 0
+cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.1
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
+# logging to TensorBoard and write checkpoints each 20 and 200 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 20
+cfg_ppo["experiment"]["checkpoint_interval"] = 200
+
+agent = PPO(models=models_ppo,
+            memory=memory,
+            cfg=cfg_ppo,
+            observation_space=env.observation_space,
+            action_space=env.action_space,
+            device=device)
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 4000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start training
+trainer.train()
diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole.py b/docs/source/examples/omniisaacgym/ppo_cartpole.py
index 248ffd0e..f091fdf9 100644
--- a/docs/source/examples/omniisaacgym/ppo_cartpole.py
+++ b/docs/source/examples/omniisaacgym/ppo_cartpole.py
@@ -17,38 +17,35 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
                                  nn.Linear(32, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(32, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(32, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Omniverse Isaac Gym environment
@@ -66,12 +63,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
index 8cbb23cd..9838c6ca 100644
--- a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
+++ b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py
@@ -19,38 +19,35 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
                                  nn.Linear(32, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(32, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(32, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the multi-threaded Omniverse Isaac Gym environment
@@ -68,12 +65,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/omniisaacgym/ppo_crazy_flie.py b/docs/source/examples/omniisaacgym/ppo_crazy_flie.py
new file mode 100644
index 00000000..b8c16fd6
--- /dev/null
+++ b/docs/source/examples/omniisaacgym/ppo_crazy_flie.py
@@ -0,0 +1,115 @@
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
+from skrl.memories.torch import RandomMemory
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+from skrl.envs.torch import load_omniverse_isaacgym_env
+from skrl.utils import set_seed
+
+
+# set the seed for reproducibility
+set_seed(42)
+
+
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.Tanh(),
+                                 nn.Linear(256, 256),
+                                 nn.Tanh(),
+                                 nn.Linear(256, 128),
+                                 nn.Tanh())
+        
+        self.mean_layer = nn.Linear(128, self.num_actions)
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(128, 1)
+
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
+
+    def compute(self, states, taken_actions, role):
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
+
+
+# Load and wrap the Omniverse Isaac Gym environment
+env = load_omniverse_isaacgym_env(task_name="Crazyflie")
+env = wrap_env(env)
+
+device = env.device
+
+
+# Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
+memory = RandomMemory(memory_size=16, num_envs=env.num_envs, device=device)
+
+
+# Instantiate the agent's models (function approximators).
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["rollouts"] = 16  # memory_size
+cfg_ppo["learning_epochs"] = 8
+cfg_ppo["mini_batches"] = 4  # 16 * 4096 / 16384    
+cfg_ppo["discount_factor"] = 0.99
+cfg_ppo["lambda"] = 0.95
+cfg_ppo["learning_rate"] = 1e-4
+cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
+cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.016}
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["grad_norm_clip"] = 1.0
+cfg_ppo["ratio_clip"] = 0.2
+cfg_ppo["value_clip"] = 0.2
+cfg_ppo["clip_predicted_values"] = True
+cfg_ppo["entropy_loss_scale"] = 0.0
+cfg_ppo["value_loss_scale"] = 1.0
+cfg_ppo["kl_threshold"] = 0
+cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
+# logging to TensorBoard and write checkpoints each 80 and 800 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 80
+cfg_ppo["experiment"]["checkpoint_interval"] = 800
+
+agent = PPO(models=models_ppo,
+            memory=memory, 
+            cfg=cfg_ppo, 
+            observation_space=env.observation_space, 
+            action_space=env.action_space,
+            device=device)
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 16000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start training
+trainer.train()
diff --git a/docs/source/examples/omniisaacgym/ppo_franka_cabinet.py b/docs/source/examples/omniisaacgym/ppo_franka_cabinet.py
new file mode 100644
index 00000000..310c2e2e
--- /dev/null
+++ b/docs/source/examples/omniisaacgym/ppo_franka_cabinet.py
@@ -0,0 +1,115 @@
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
+from skrl.memories.torch import RandomMemory
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+from skrl.envs.torch import load_omniverse_isaacgym_env
+from skrl.utils import set_seed
+
+
+# set the seed for reproducibility
+set_seed(42)
+
+
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, 64),
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(64, self.num_actions)
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(64, 1)
+
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
+
+    def compute(self, states, taken_actions, role):
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
+
+
+# Load and wrap the Omniverse Isaac Gym environment
+env = load_omniverse_isaacgym_env(task_name="FrankaCabinet")
+env = wrap_env(env)
+
+device = env.device
+
+
+# Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
+memory = RandomMemory(memory_size=16, num_envs=env.num_envs, device=device)
+
+
+# Instantiate the agent's models (function approximators).
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["rollouts"] = 16  # memory_size
+cfg_ppo["learning_epochs"] = 8
+cfg_ppo["mini_batches"] = 8  # 16 * 4096 / 8192    
+cfg_ppo["discount_factor"] = 0.99
+cfg_ppo["lambda"] = 0.95
+cfg_ppo["learning_rate"] = 5e-4
+cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
+cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008}
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["grad_norm_clip"] = 1.0
+cfg_ppo["ratio_clip"] = 0.2
+cfg_ppo["value_clip"] = 0.2
+cfg_ppo["clip_predicted_values"] = True
+cfg_ppo["entropy_loss_scale"] = 0.0
+cfg_ppo["value_loss_scale"] = 2.0
+cfg_ppo["kl_threshold"] = 0
+cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
+# logging to TensorBoard and write checkpoints each 120 and 1200 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 120
+cfg_ppo["experiment"]["checkpoint_interval"] = 1200
+
+agent = PPO(models=models_ppo,
+            memory=memory, 
+            cfg=cfg_ppo, 
+            observation_space=env.observation_space, 
+            action_space=env.action_space,
+            device=device)
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 24000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start training
+trainer.train()
diff --git a/docs/source/examples/omniisaacgym/ppo_humanoid.py b/docs/source/examples/omniisaacgym/ppo_humanoid.py
index 29d57b6d..cf45a8a1 100644
--- a/docs/source/examples/omniisaacgym/ppo_humanoid.py
+++ b/docs/source/examples/omniisaacgym/ppo_humanoid.py
@@ -17,42 +17,37 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 400),
                                  nn.ELU(),
                                  nn.Linear(400, 200),
                                  nn.ELU(),
                                  nn.Linear(200, 100),
-                                 nn.ELU(),
-                                 nn.Linear(100, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(100, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(100, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 400),
-                                 nn.ELU(),
-                                 nn.Linear(400, 200),
-                                 nn.ELU(),
-                                 nn.Linear(200, 100),
-                                 nn.ELU(),
-                                 nn.Linear(100, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Omniverse Isaac Gym environment
@@ -70,12 +65,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model 
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/omniisaacgym/ppo_ingenuity.py b/docs/source/examples/omniisaacgym/ppo_ingenuity.py
new file mode 100644
index 00000000..345ec93c
--- /dev/null
+++ b/docs/source/examples/omniisaacgym/ppo_ingenuity.py
@@ -0,0 +1,115 @@
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
+from skrl.memories.torch import RandomMemory
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+from skrl.envs.torch import load_omniverse_isaacgym_env
+from skrl.utils import set_seed
+
+
+# set the seed for reproducibility
+set_seed(42)
+
+
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(128, self.num_actions)
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(128, 1)
+
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
+
+    def compute(self, states, taken_actions, role):
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
+
+
+# Load and wrap the Omniverse Isaac Gym environment
+env = load_omniverse_isaacgym_env(task_name="Ingenuity")
+env = wrap_env(env)
+
+device = env.device
+
+
+# Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
+memory = RandomMemory(memory_size=16, num_envs=env.num_envs, device=device)
+
+
+# Instantiate the agent's models (function approximators).
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["rollouts"] = 16  # memory_size
+cfg_ppo["learning_epochs"] = 8
+cfg_ppo["mini_batches"] = 4  # 16 * 4096 / 16384
+cfg_ppo["discount_factor"] = 0.99
+cfg_ppo["lambda"] = 0.95
+cfg_ppo["learning_rate"] = 1e-3
+cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
+cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.016}
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["grad_norm_clip"] = 1.0
+cfg_ppo["ratio_clip"] = 0.2
+cfg_ppo["value_clip"] = 0.2
+cfg_ppo["clip_predicted_values"] = True
+cfg_ppo["entropy_loss_scale"] = 0.0
+cfg_ppo["value_loss_scale"] = 1.0
+cfg_ppo["kl_threshold"] = 0
+cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
+# logging to TensorBoard and write checkpoints each 32 and 320 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 32
+cfg_ppo["experiment"]["checkpoint_interval"] = 320
+
+agent = PPO(models=models_ppo,
+            memory=memory, 
+            cfg=cfg_ppo, 
+            observation_space=env.observation_space, 
+            action_space=env.action_space,
+            device=device)
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 6400, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start training
+trainer.train()
diff --git a/docs/source/examples/omniisaacgym/ppo_quadcopter.py b/docs/source/examples/omniisaacgym/ppo_quadcopter.py
new file mode 100644
index 00000000..2425c4f2
--- /dev/null
+++ b/docs/source/examples/omniisaacgym/ppo_quadcopter.py
@@ -0,0 +1,115 @@
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
+from skrl.memories.torch import RandomMemory
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+from skrl.envs.torch import load_omniverse_isaacgym_env
+from skrl.utils import set_seed
+
+
+# set the seed for reproducibility
+set_seed(42)
+
+
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(128, self.num_actions)
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(128, 1)
+
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
+
+    def compute(self, states, taken_actions, role):
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
+
+
+# Load and wrap the Omniverse Isaac Gym environment
+env = load_omniverse_isaacgym_env(task_name="Quadcopter")
+env = wrap_env(env)
+
+device = env.device
+
+
+# Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
+memory = RandomMemory(memory_size=16, num_envs=env.num_envs, device=device)
+
+
+# Instantiate the agent's models (function approximators).
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["rollouts"] = 16  # memory_size
+cfg_ppo["learning_epochs"] = 8
+cfg_ppo["mini_batches"] = 4  # 16 * 4096 / 16384
+cfg_ppo["discount_factor"] = 0.99
+cfg_ppo["lambda"] = 0.95
+cfg_ppo["learning_rate"] = 1e-3
+cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
+cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.016}
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["grad_norm_clip"] = 1.0
+cfg_ppo["ratio_clip"] = 0.2
+cfg_ppo["value_clip"] = 0.2
+cfg_ppo["clip_predicted_values"] = True
+cfg_ppo["entropy_loss_scale"] = 0.0
+cfg_ppo["value_loss_scale"] = 1.0
+cfg_ppo["kl_threshold"] = 0
+cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.1
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
+# logging to TensorBoard and write checkpoints each 80 and 800 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 80
+cfg_ppo["experiment"]["checkpoint_interval"] = 800
+
+agent = PPO(models=models_ppo,
+            memory=memory,
+            cfg=cfg_ppo,
+            observation_space=env.observation_space,
+            action_space=env.action_space,
+            device=device)
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 16000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start training
+trainer.train()
diff --git a/docs/source/examples/omniisaacgym/ppo_shadow_hand.py b/docs/source/examples/omniisaacgym/ppo_shadow_hand.py
index 1d1e9e11..4908a92c 100644
--- a/docs/source/examples/omniisaacgym/ppo_shadow_hand.py
+++ b/docs/source/examples/omniisaacgym/ppo_shadow_hand.py
@@ -17,14 +17,13 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -33,30 +32,24 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(512, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(128, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(128, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
-                                 nn.ELU(),
-                                 nn.Linear(512, 512),
-                                 nn.ELU(),
-                                 nn.Linear(512, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Omniverse Isaac Gym environment
@@ -67,28 +60,24 @@ def compute(self, states, taken_actions, role):
 
 
 # Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
-memory = RandomMemory(memory_size=8, num_envs=env.num_envs, device=device)
+memory = RandomMemory(memory_size=16, num_envs=env.num_envs, device=device)
 
 
 # Instantiate the agent's models (function approximators).
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model 
 
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
 cfg_ppo = PPO_DEFAULT_CONFIG.copy()
-cfg_ppo["rollouts"] = 8  # memory_size
+cfg_ppo["rollouts"] = 16  # memory_size
 cfg_ppo["learning_epochs"] = 5
-cfg_ppo["mini_batches"] = 4  # 8 * 16384 / 32768
+cfg_ppo["mini_batches"] = 4  # 16 * 8192 / 32768
 cfg_ppo["discount_factor"] = 0.99
 cfg_ppo["lambda"] = 0.95
 cfg_ppo["learning_rate"] = 5e-4
@@ -108,9 +97,9 @@ def compute(self, states, taken_actions, role):
 cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
 cfg_ppo["value_preprocessor"] = RunningStandardScaler
 cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
-# logging to TensorBoard and write checkpoints each 200 and 2000 timesteps respectively
-cfg_ppo["experiment"]["write_interval"] = 200
-cfg_ppo["experiment"]["checkpoint_interval"] = 2000
+# logging to TensorBoard and write checkpoints each 800 and 8000 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 800
+cfg_ppo["experiment"]["checkpoint_interval"] = 8000
 
 agent = PPO(models=models_ppo,
             memory=memory, 
@@ -121,7 +110,7 @@ def compute(self, states, taken_actions, role):
 
 
 # Configure and instantiate the RL trainer
-cfg_trainer = {"timesteps": 40000, "headless": True}
+cfg_trainer = {"timesteps": 160000, "headless": True}
 trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
 
 # start training

From 00a317b33344c90fdb75354b56f4076ae05ba162 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <aserrano@mondragon.edu>
Date: Fri, 30 Sep 2022 10:23:30 +0200
Subject: [PATCH 092/108] Update Isaac Gym examples

---
 .../examples/isaacgym/ppo_allegro_hand.py     | 55 ++++++++----------
 docs/source/examples/isaacgym/ppo_ant.py      | 51 +++++++----------
 docs/source/examples/isaacgym/ppo_anymal.py   | 51 +++++++----------
 .../examples/isaacgym/ppo_anymal_terrain.py   |  8 +--
 .../examples/isaacgym/ppo_ball_balance.py     | 50 +++++++---------
 docs/source/examples/isaacgym/ppo_cartpole.py | 49 +++++++---------
 .../examples/isaacgym/ppo_cartpole_eval.py    | 31 +++++++---
 .../examples/isaacgym/ppo_franka_cabinet.py   | 51 +++++++----------
 docs/source/examples/isaacgym/ppo_humanoid.py | 51 +++++++----------
 .../source/examples/isaacgym/ppo_ingenuity.py | 57 ++++++++-----------
 .../examples/isaacgym/ppo_quadcopter.py       | 51 +++++++----------
 .../examples/isaacgym/ppo_shadow_hand.py      | 53 +++++++----------
 .../source/examples/isaacgym/ppo_trifinger.py | 57 +++++++------------
 13 files changed, 261 insertions(+), 354 deletions(-)

diff --git a/docs/source/examples/isaacgym/ppo_allegro_hand.py b/docs/source/examples/isaacgym/ppo_allegro_hand.py
index 927b8f5d..2214df9b 100644
--- a/docs/source/examples/isaacgym/ppo_allegro_hand.py
+++ b/docs/source/examples/isaacgym/ppo_allegro_hand.py
@@ -16,49 +16,44 @@
 
 
 # set the seed for reproducibility
-set_seed(42)
+seed = set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
                                  nn.Linear(512, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(128, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(128, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
-                                 nn.ELU(),
-                                 nn.Linear(512, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment using the easy-to-use API from NVIDIA
-env = isaacgymenvs.make(seed=42, 
+env = isaacgymenvs.make(seed=seed, 
                         task="AllegroHand", 
                         num_envs=16384, 
                         sim_device="cuda:0",
@@ -78,12 +73,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/isaacgym/ppo_ant.py b/docs/source/examples/isaacgym/ppo_ant.py
index ad2ce0bf..4bbc11f1 100644
--- a/docs/source/examples/isaacgym/ppo_ant.py
+++ b/docs/source/examples/isaacgym/ppo_ant.py
@@ -19,42 +19,37 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 128),
                                  nn.ELU(),
                                  nn.Linear(128, 64),
-                                 nn.ELU(),
-                                 nn.Linear(64, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(64, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(64, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 64),
-                                 nn.ELU(),
-                                 nn.Linear(64, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment
@@ -72,12 +67,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/isaacgym/ppo_anymal.py b/docs/source/examples/isaacgym/ppo_anymal.py
index 656cc3fe..1de573e8 100644
--- a/docs/source/examples/isaacgym/ppo_anymal.py
+++ b/docs/source/examples/isaacgym/ppo_anymal.py
@@ -19,42 +19,37 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 128),
                                  nn.ELU(),
                                  nn.Linear(128, 64),
-                                 nn.ELU(),
-                                 nn.Linear(64, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(64, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(64, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 64),
-                                 nn.ELU(),
-                                 nn.Linear(64, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment
@@ -72,12 +67,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/isaacgym/ppo_anymal_terrain.py b/docs/source/examples/isaacgym/ppo_anymal_terrain.py
index d64118f2..f83e0c9f 100644
--- a/docs/source/examples/isaacgym/ppo_anymal_terrain.py
+++ b/docs/source/examples/isaacgym/ppo_anymal_terrain.py
@@ -24,9 +24,9 @@
 # - Value: takes the state as input and provides a value to guide the policy
 class Policy(GaussianMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -75,10 +75,6 @@ def compute(self, states, taken_actions, role):
 models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
 models_ppo["value"] = Value(env.observation_space, env.action_space, device)
 
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
-
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
diff --git a/docs/source/examples/isaacgym/ppo_ball_balance.py b/docs/source/examples/isaacgym/ppo_ball_balance.py
index bacde4d7..e3428f60 100644
--- a/docs/source/examples/isaacgym/ppo_ball_balance.py
+++ b/docs/source/examples/isaacgym/ppo_ball_balance.py
@@ -19,42 +19,37 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 128),
                                  nn.ELU(),
                                  nn.Linear(128, 64),
                                  nn.ELU(),
                                  nn.Linear(64, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(32, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(32, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 64),
-                                 nn.ELU(),
-                                 nn.Linear(64, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment
@@ -72,12 +67,9 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
 
 # Configure and instantiate the agent.
 # Only modify some of the default configuration, visit its documentation to see all the options
diff --git a/docs/source/examples/isaacgym/ppo_cartpole.py b/docs/source/examples/isaacgym/ppo_cartpole.py
index 9525453c..e3f8c253 100644
--- a/docs/source/examples/isaacgym/ppo_cartpole.py
+++ b/docs/source/examples/isaacgym/ppo_cartpole.py
@@ -19,38 +19,35 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
                                  nn.Linear(32, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(32, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(32, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment
@@ -68,12 +65,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/isaacgym/ppo_cartpole_eval.py b/docs/source/examples/isaacgym/ppo_cartpole_eval.py
index 3ed1ff63..dde950f6 100644
--- a/docs/source/examples/isaacgym/ppo_cartpole_eval.py
+++ b/docs/source/examples/isaacgym/ppo_cartpole_eval.py
@@ -4,7 +4,7 @@
 import torch.nn as nn
 
 # Import the skrl components to build the RL system
-from skrl.models.torch import Model, GaussianMixin
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
 from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
 from skrl.resources.preprocessors.torch import RunningStandardScaler
 from skrl.trainers.torch import SequentialTrainer
@@ -12,22 +12,35 @@
 from skrl.envs.torch import load_isaacgym_env_preview4
 
 
-# Define only the policy for evaluation 
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
                                  nn.ELU(),
                                  nn.Linear(32, 32),
-                                 nn.ELU(),
-                                 nn.Linear(32, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(32, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(32, 1)
+
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment
@@ -41,7 +54,7 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/isaacgym/ppo_franka_cabinet.py b/docs/source/examples/isaacgym/ppo_franka_cabinet.py
index 690ad036..3fd9e834 100644
--- a/docs/source/examples/isaacgym/ppo_franka_cabinet.py
+++ b/docs/source/examples/isaacgym/ppo_franka_cabinet.py
@@ -19,42 +19,37 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 128),
                                  nn.ELU(),
                                  nn.Linear(128, 64),
-                                 nn.ELU(),
-                                 nn.Linear(64, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(64, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(64, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 64),
-                                 nn.ELU(),
-                                 nn.Linear(64, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment
@@ -72,12 +67,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/isaacgym/ppo_humanoid.py b/docs/source/examples/isaacgym/ppo_humanoid.py
index 1a1272f9..19893771 100644
--- a/docs/source/examples/isaacgym/ppo_humanoid.py
+++ b/docs/source/examples/isaacgym/ppo_humanoid.py
@@ -19,42 +19,37 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 400),
                                  nn.ELU(),
                                  nn.Linear(400, 200),
                                  nn.ELU(),
                                  nn.Linear(200, 100),
-                                 nn.ELU(),
-                                 nn.Linear(100, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(100, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(100, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 400),
-                                 nn.ELU(),
-                                 nn.Linear(400, 200),
-                                 nn.ELU(),
-                                 nn.Linear(200, 100),
-                                 nn.ELU(),
-                                 nn.Linear(100, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment
@@ -72,12 +67,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/isaacgym/ppo_ingenuity.py b/docs/source/examples/isaacgym/ppo_ingenuity.py
index 84c7570b..f60d46aa 100644
--- a/docs/source/examples/isaacgym/ppo_ingenuity.py
+++ b/docs/source/examples/isaacgym/ppo_ingenuity.py
@@ -16,55 +16,50 @@
 
 
 # set the seed for reproducibility
-set_seed(42)
+seed = set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(128, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(128, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment using the easy-to-use API from NVIDIA
-env = isaacgymenvs.make(seed=42, 
+env = isaacgymenvs.make(seed=seed, 
                         task="Ingenuity", 
                         num_envs=4096, 
                         sim_device="cuda:0",
                         rl_device="cuda:0",
                         graphics_device_id=0,
-                        headless=False)
+                        headless=True)
 env = wrap_env(env)
 
 device = env.device
@@ -78,12 +73,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/isaacgym/ppo_quadcopter.py b/docs/source/examples/isaacgym/ppo_quadcopter.py
index 06289885..34ed47ed 100644
--- a/docs/source/examples/isaacgym/ppo_quadcopter.py
+++ b/docs/source/examples/isaacgym/ppo_quadcopter.py
@@ -19,42 +19,37 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(128, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(128, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment
@@ -72,12 +67,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/isaacgym/ppo_shadow_hand.py b/docs/source/examples/isaacgym/ppo_shadow_hand.py
index 2f184912..c7e8636a 100644
--- a/docs/source/examples/isaacgym/ppo_shadow_hand.py
+++ b/docs/source/examples/isaacgym/ppo_shadow_hand.py
@@ -19,14 +19,13 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
                                  nn.ELU(),
@@ -35,30 +34,24 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(512, 256),
                                  nn.ELU(),
                                  nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(128, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(128, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
-                                 nn.ELU(),
-                                 nn.Linear(512, 512),
-                                 nn.ELU(),
-                                 nn.Linear(512, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment
@@ -76,12 +69,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model
 
 
 # Configure and instantiate the agent.
diff --git a/docs/source/examples/isaacgym/ppo_trifinger.py b/docs/source/examples/isaacgym/ppo_trifinger.py
index 898d5bb6..87591dcf 100644
--- a/docs/source/examples/isaacgym/ppo_trifinger.py
+++ b/docs/source/examples/isaacgym/ppo_trifinger.py
@@ -19,14 +19,13 @@
 set_seed(42)
 
 
-# Define the models (stochastic and deterministic models) for the agent using mixins.
-# - Policy: takes as input the environment's observation/state and returns an action
-# - Value: takes the state as input and provides a value to guide the policy
-class Policy(GaussianMixin, Model):
+# Define the shared model (stochastic and deterministic models) for the agent using mixins.
+class Shared(GaussianMixin, DeterministicMixin, Model):
     def __init__(self, observation_space, action_space, device, clip_actions=False,
-                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+                 clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
         Model.__init__(self, observation_space, action_space, device)
-        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+        DeterministicMixin.__init__(self, clip_actions)
 
         self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
                                  nn.ELU(),
@@ -35,30 +34,24 @@ def __init__(self, observation_space, action_space, device, clip_actions=False,
                                  nn.Linear(256, 128),
                                  nn.ELU(),
                                  nn.Linear(128, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, self.num_actions))
+                                 nn.ELU())
+        
+        self.mean_layer = nn.Linear(128, self.num_actions)
         self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+        
+        self.value_layer = nn.Linear(128, 1)
 
-    def compute(self, states, taken_actions, role):
-        return self.net(states), self.log_std_parameter
-
-class Value(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions=False):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 256),
-                                 nn.ELU(),
-                                 nn.Linear(256, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 128),
-                                 nn.ELU(),
-                                 nn.Linear(128, 1))
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
 
     def compute(self, states, taken_actions, role):
-        return self.net(states)
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
 
 
 # Load and wrap the Isaac Gym environment
@@ -76,12 +69,8 @@ def compute(self, states, taken_actions, role):
 # PPO requires 2 models, visit its documentation for more details
 # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
 models_ppo = {}
-models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
-models_ppo["value"] = Value(env.observation_space, env.action_space, device)
-
-# Initialize the models' parameters (weights and biases) using a Gaussian distribution
-for model in models_ppo.values():
-    model.init_parameters(method_name="normal_", mean=0.0, std=0.1)   
+models_ppo["policy"] = Shared(env.observation_space, env.action_space, device)
+models_ppo["value"] = models_ppo["policy"]  # same instance: shared model 
 
 
 # Configure and instantiate the agent.
@@ -94,8 +83,6 @@ def compute(self, states, taken_actions, role):
 cfg_ppo["discount_factor"] = 0.99
 cfg_ppo["lambda"] = 0.95
 cfg_ppo["learning_rate"] = 3e-4
-cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
-cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.016}
 cfg_ppo["random_timesteps"] = 0
 cfg_ppo["learning_starts"] = 0
 cfg_ppo["grad_norm_clip"] = 1.0
@@ -104,7 +91,7 @@ def compute(self, states, taken_actions, role):
 cfg_ppo["clip_predicted_values"] = True
 cfg_ppo["entropy_loss_scale"] = 0.0
 cfg_ppo["value_loss_scale"] = 2.0
-cfg_ppo["kl_threshold"] = 0
+cfg_ppo["kl_threshold"] = 0.016
 cfg_ppo["rewards_shaper"] = lambda rewards, timestep, timesteps: rewards * 0.01
 cfg_ppo["state_preprocessor"] = RunningStandardScaler
 cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}

From 23dc7ceeda2a8f947d86d9706cc574a0428eb3f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 1 Oct 2022 09:59:34 +0200
Subject: [PATCH 093/108] Update the getting started section

---
 docs/source/intro/getting_started.rst | 531 +++++++++++++++++++++++++-
 1 file changed, 510 insertions(+), 21 deletions(-)

diff --git a/docs/source/intro/getting_started.rst b/docs/source/intro/getting_started.rst
index a6259c1c..112a80ef 100644
--- a/docs/source/intro/getting_started.rst
+++ b/docs/source/intro/getting_started.rst
@@ -4,60 +4,549 @@ Getting Started
 **Reinforcement Learning (RL)** is a Machine Learning sub-field for decision making that allows an agent to learn from its interaction with the environment as shown in the following schema:
 
 .. image:: ../_static/imgs/rl_schema.svg
-      :width: 100%
-      :align: center
-      :alt: Reinforcement Learning schema
+    :width: 100%
+    :align: center
+    :alt: Reinforcement Learning schema
 
 .. raw:: html
 
-   <br>
+    <br>
 
-At each step (also called timestep) of interaction with the environment, the agent sees an observation :math:`o_t` of the complete description of the state :math:`s_t \in S` of the environment. Then, it decides which action :math:`a_t \in A` to take from the action space using a policy. The environment, which changes in response to the agent's action (or by itself), returns a reward signal :math:`r_t = R(s_t, a_t, s_{t+1})` as a measure of how good or bad the action was that moved it to its new state :math:`s_{t+1}`. The agent aims to maximize the cumulative reward (discounted or not by a factor :math:`\gamma \in (0,1]`) by adjusting the policy's behaviour via some optimization algorithm
+At each step (also called timestep) of interaction with the environment, the agent sees an observation :math:`o_t` of the complete description of the state :math:`s_t \in S` of the environment. Then, it decides which action :math:`a_t \in A` to take from the action space using a policy. The environment, which changes in response to the agent's action (or by itself), returns a reward signal :math:`r_t = R(s_t, a_t, s_{t+1})` as a measure of how good or bad the action was that moved it to its new state :math:`s_{t+1}`. The agent aims to maximize the cumulative reward (discounted or not by a factor :math:`\gamma \in (0,1]`) by adjusting the policy's behaviour via some optimization algorithm.
 
-**Based on this schema, this section intends to guide, step by step, in the creation of an RL system**
+**From this schema, this section is intended to guide in the creation of a RL system using skrl**. Visit the :ref:`Examples <examples>` section for training and evaluation demonstrations with different environment interfaces and highlighted practices, among others. 
 
 1. Environments
 ---------------
 
-The environment plays a fundamental role in the definition of the RL schema. For example, the selection of the agent depends strongly on the observation and action space nature. There are several interfaces to interact with the environments such as OpenAI Gym or DeepMind. However, each of them has a different API and work with non-compatible data types
+The environment plays a fundamental role in the definition of the RL schema. For example, the selection of the agent depends strongly on the observation and action space nature. There are several interfaces to interact with the environments such as OpenAI Gym or DeepMind. However, each of them has a different API and work with non-compatible data types.
 
-skrl offers a function to **wrap environments** based on the OpenAI Gym, DeepMind, Isaac Gym and Omniverse Isaac Gym interfaces (the last two have slight differences with OpenAI Gym) and offer, for library components, a common interface (based on OpenAI Gym) as shown in the following figure. Refer to the :doc:`Wrapping <../modules/skrl.envs.wrapping>` section for more information
+skrl offers a function to **wrap environments** based on the OpenAI Gym, DeepMind, Isaac Gym and Omniverse Isaac Gym interfaces (the last two have slight differences with OpenAI Gym) and offer, for library components, a common interface (based on OpenAI Gym) as shown in the following figure. Refer to the :doc:`Wrapping <../modules/skrl.envs.wrapping>` section for more information.
 
 .. image:: ../_static/imgs/wrapping.svg
-      :width: 100%
-      :align: center
-      :alt: Environment wrapping
+    :width: 100%
+    :align: center
+    :alt: Environment wrapping
 
-.. raw:: html
+Within the methods and properties defined in the wrapped environment, the observation and action space are one of the most relevant for instantiating other library components. The following code snippets show how to load and wrap environments based on the supported interfaces:
+
+.. tabs::
+
+    .. tab:: Omniverse Isaac Gym
+
+        .. tabs::
+
+            .. tab:: Common environment
+
+                .. code-block:: python
+
+                    # import the environment wrapper and loader
+                    from skrl.envs.torch import wrap_env
+                    from skrl.envs.torch import load_omniverse_isaacgym_env
+
+                    # load the environment
+                    env = load_omniverse_isaacgym_env(task_name="Cartpole")
+
+                    # wrap the environment
+                    env = wrap_env(env)  # or 'env = wrap_env(env, wrapper="omniverse-isaacgym")'
+
+            .. tab:: Multi-threaded environment
+
+                .. code-block:: python
+
+                    # import the environment wrapper and loader
+                    from skrl.envs.torch import wrap_env
+                    from skrl.envs.torch import load_omniverse_isaacgym_env
+
+                    # load the multi-threaded environment
+                    env = load_omniverse_isaacgym_env(task_name="Cartpole", multi_threaded=True, timeout=30)
+
+                    # wrap the environment
+                    env = wrap_env(env)  # or 'env = wrap_env(env, wrapper="omniverse-isaacgym")'
+
+    .. tab:: Isaac Gym
+
+        .. tabs::
+
+            .. tab:: Preview 4 (isaacgymenvs.make)
+            
+                .. code-block:: python
+
+                    import isaacgymenvs
+
+                    # import the environment wrapper
+                    from skrl.envs.torch import wrap_env
+
+                    # create/load the environment using the easy-to-use API from NVIDIA
+                    env = isaacgymenvs.make(seed=0, 
+                                            task="Cartpole", 
+                                            num_envs=512, 
+                                            sim_device="cuda:0",
+                                            rl_device="cuda:0",
+                                            graphics_device_id=0,
+                                            headless=False)
+
+                    # wrap the environment
+                    env = wrap_env(env)  # or 'env = wrap_env(env, wrapper="isaacgym-preview4")'
+
+            .. tab:: Preview 4
+            
+                .. code-block:: python
+
+                    # import the environment wrapper and loader
+                    from skrl.envs.torch import wrap_env
+                    from skrl.envs.torch import load_isaacgym_env_preview4
+
+                    # load the environment
+                    env = load_isaacgym_env_preview4(task_name="Cartpole")
+
+                    # wrap the environment
+                    env = wrap_env(env)  # or 'env = wrap_env(env, wrapper="isaacgym-preview4")'
+
+            .. tab:: Preview 3
+            
+                .. code-block:: python
+
+                    # import the environment wrapper and loader
+                    from skrl.envs.torch import wrap_env
+                    from skrl.envs.torch import load_isaacgym_env_preview3
+
+                    # load the environment
+                    env = load_isaacgym_env_preview3(task_name="Cartpole")
+
+                    # wrap the environment
+                    env = wrap_env(env)  # or 'env = wrap_env(env, wrapper="isaacgym-preview3")'
+
+            .. tab:: Preview 2
+            
+                .. code-block:: python
+
+                    # import the environment wrapper and loader
+                    from skrl.envs.torch import wrap_env
+                    from skrl.envs.torch import load_isaacgym_env_preview2
+
+                    # load the environment
+                    env = load_isaacgym_env_preview2(task_name="Cartpole")
+
+                    # wrap the environment
+                    env = wrap_env(env)  # or 'env = wrap_env(env, wrapper="isaacgym-preview2")'
+
+    .. tab:: OpenAI Gym
+   
+        .. tabs::
+
+            .. tab:: Single environment
+
+                .. code-block:: python
+
+                    # import the environment wrapper and gym
+                    from skrl.envs.torch import wrap_env
+                    import gym
+
+                    # load environment
+                    env = gym.make('Pendulum-v1')
+
+                    # wrap the environment
+                    env = wrap_env(env)  # or 'env = wrap_env(env, wrapper="gym")'
+
+            .. tab:: Vectorized environment
 
-   <br>
+                Visit the OpenAI Gym documentation (`Vector API <https://www.gymlibrary.dev/content/vector_api>`_) for more information about the creation and usage of vectorized environments.
+
+                .. code-block:: python
+
+                    # import the environment wrapper and gym
+                    from skrl.envs.torch import wrap_env
+                    import gym
+
+                    # load a vectorized environment
+                    env = gym.vector.make("Pendulum-v1", num_envs=10, asynchronous=False)
+
+                    # wrap the environment
+                    env = wrap_env(env)  # or 'env = wrap_env(env, wrapper="gym")'
+
+    .. tab:: DeepMind
+   
+        .. code-block:: python
+
+            # import the environment wrapper and the deepmind suite
+            from skrl.envs.torch import wrap_env
+            from dm_control import suite
+
+            # load environment
+            env = suite.load(domain_name="cartpole", task_name="swingup")
+
+            # wrap the environment
+            env = wrap_env(env)  # or 'env = wrap_env(env, wrapper="dm")'
+
+Once the environment is known (and instantiated), it is time to configure and instantiate the agent. Agents are composed, apart from the optimization algorithm, by several components, such as memories, models or noises, for example, according to their nature. The following subsections focus on those components.
 
 2. Memories
 -----------
 
-:red:`Under construction...`
+Memories are storage components that allow agents to collect and use/reuse recent or past experiences or other types of information. These can be large in size (such as replay buffers used by off-policy algorithms like DDPG, TD3 or SAC) or small in size (such as rollout buffers used by on-policy algorithms like PPO or TRPO to store batches that are discarded after use).
+
+skrl provides **generic memory definitions** that are not tied to the agent implementation and can be used for any role, such as rollout or replay buffers. They are empty shells when they are instantiated and the agents are in charge of defining the tensors according to their needs. The total space occupied is the product of the memory size (:literal:`memory_size`), the number of environments (:literal:`num_envs`) obtained from the wrapped environment and the data size for each defined tensor.
+
+The following code snippets show how to instantiate a memory:
+
+.. tabs::
+
+    .. tab:: Random memory
+
+        .. code-block:: python
+
+            from skrl.memories.torch import RandomMemory
+
+            # instantiate a memory
+            memory = RandomMemory(memory_size=100000, num_envs=env.num_envs)
+
+Memories are passed directly to the agent constructor, if required (not all agents require memory, such as Q-learning or SARSA, for example), during its instantiation under the argument :literal:`memory`.
 
 3. Models
 ---------
 
-:red:`Under construction...`
+Models are the agents' brains. Agents can have one or several models and their parameters are adjusted via the optimization algorithms.
+
+In contrast to other libraries, skrl does not provide predefined models or fixed templates (this practice tends to hide and reduce the flexibility of the system, forcing developers to deeply inspect the code to make even small changes). Nevertheless, **helper classes/mixins are provided** to create discrete and continuous (stochastic or deterministic) models with the library. In this way, the user/researcher should only be concerned with the definition of the approximation functions (tables or artificial neural networks), having all the control in his hands.
+
+The following code snippets show how to define a model, based on the concept of each respective image, using the provided classes/mixins. For more information refer to :ref:`Categorical <models_categorical>`, :ref:`Gaussian <models_gaussian>`, :ref:`Multivariate Gaussian <models_multivariate_gaussian>` and :ref:`Deterministic <models_deterministic>` sections for artificial neural networks models, and :ref:`Tabular <models_tabular>` section for tabular models.
+
+.. tabs::
+
+    .. tab:: Categorical
+
+        .. image:: ../_static/imgs/model_categorical.svg
+            :width: 100%
+            :align: center
+            :alt: Categorical model
+
+        .. raw:: html
+
+            <hr>
+
+        .. code-block:: python
+
+            import torch
+            import torch.nn as nn
+            from skrl.models.torch import Model, CategoricalMixin
+
+            # define the model
+            class Policy(CategoricalMixin, Model):
+                def __init__(self, observation_space, action_space, device="cuda:0", unnormalized_log_prob=True):
+                    Model.__init__(self, observation_space, action_space, device)
+                    CategoricalMixin.__init__(self, unnormalized_log_prob)
+
+                    self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                            nn.ELU(),
+                                            nn.Linear(32, 32),
+                                            nn.ELU(),
+                                            nn.Linear(32, self.num_actions))
+
+                def compute(self, states, taken_actions, role):
+                    return self.net(states)
+
+    .. tab:: Gaussian
+
+        .. image:: ../_static/imgs/model_gaussian.svg
+            :width: 100%
+            :align: center
+            :alt: Gaussian model
+
+        .. raw:: html
+
+            <hr>
+
+        .. code-block:: python
+
+            import torch
+            import torch.nn as nn
+            from skrl.models.torch import Model, GaussianMixin
+            
+            # define the model
+            class Policy(GaussianMixin, Model):
+                def __init__(self, observation_space, action_space, device="cuda:0", 
+                             clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+                    Model.__init__(self, observation_space, action_space, device)
+                    GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+                    
+                    self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                             nn.ELU(),
+                                             nn.Linear(32, 32),
+                                             nn.ELU(),
+                                             nn.Linear(32, self.num_actions))
+                    self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+            
+                def compute(self, states, taken_actions, role):
+                    return self.net(states), self.log_std_parameter
+
+    .. tab:: Multivariate Gaussian
+
+        .. image:: ../_static/imgs/model_multivariate_gaussian.svg
+            :width: 100%
+            :align: center
+            :alt: Multivariate Gaussian model
+
+        .. raw:: html
+
+            <hr>
+
+        .. code-block:: python
+
+            import torch
+            import torch.nn as nn
+            from skrl.models.torch import Model, MultivariateGaussianMixin
+
+            # define the model            
+            class Policy(MultivariateGaussianMixin, Model):
+                def __init__(self, observation_space, action_space, device="cuda:0", 
+                             clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2):
+                    Model.__init__(self, observation_space, action_space, device)
+                    MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+            
+                    self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                             nn.ELU(),
+                                             nn.Linear(32, 32),
+                                             nn.ELU(),
+                                             nn.Linear(32, self.num_actions))
+                    self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+            
+                def compute(self, states, taken_actions, role):
+                    return self.net(states), self.log_std_parameter
+
+    .. tab:: Deterministic
+
+        .. image:: ../_static/imgs/model_deterministic.svg
+            :width: 60%
+            :align: center
+            :alt: Deterministic model
+
+        .. raw:: html
+
+            <hr>
+
+        .. code-block:: python
+
+            import torch
+            import torch.nn as nn
+            from skrl.models.torch import Model, DeterministicMixin
+            
+            # define the model
+            class Policy(DeterministicMixin, Model):
+                def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False):
+                    Model.__init__(self, observation_space, action_space, device)
+                    DeterministicMixin.__init__(self, clip_actions)
+            
+                    self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                             nn.ELU(),
+                                             nn.Linear(32, 32),
+                                             nn.ELU(),
+                                             nn.Linear(32, self.num_actions))
+            
+                def compute(self, states, taken_actions, role):
+                    return self.net(states)
+
+    .. tab:: Tabular
+
+        .. code-block:: python
+
+            import torch
+            from skrl.models.torch import Model, TabularMixin
+
+            # define the model
+            class Policy(TabularMixin, Model):
+                def __init__(self, observation_space, action_space, device="cuda:0", num_envs=1):
+                    Model.__init__(self, observation_space, action_space, device)
+                    TabularMixin.__init__(self, num_envs)
+
+                    self.table = torch.ones((num_envs, self.num_observations, self.num_actions), 
+                                            dtype=torch.float32, device=self.device)
+
+                def compute(self, states, taken_actions, role):
+                    actions = torch.argmax(self.table[torch.arange(self.num_envs).view(-1, 1), states], 
+                                           dim=-1, keepdim=True).view(-1,1)
+
+Models must be collected in a dictionary and passed to the agent constructor during its instantiation under the argument :literal:`models`. The dictionary keys are specific to each agent. Visit their respective documentation for more details (under *Spaces and models* section). For example, the PPO agent requires the policy and value models as shown below:
+
+.. code-block:: python
+
+    models = {}
+    models["policy"] = Policy(env.observation_space, env.action_space, env.device)
+    models["value"] = Value(env.observation_space, env.action_space, env.device)
+
+Models can be saved and loaded to and from the file system. However, the recommended practice for loading checkpoints to perform evaluations or continue an interrupted training is through the agents (they include, in addition to the models, other components and internal instances such as preprocessors or optimizers). Refer to :ref:`Saving, loading and logging <data>` (under *Checkpoints* section) for more information.
 
 4. Noises
 ---------
 
-:red:`Under construction...`
+Noise plays a fundamental role in the exploration stage, especially in agents of a deterministic nature, such as DDPG or TD3, for example. 
+
+skrl provides, as part of its resources, **classes for instantiating noises** as shown in the following code snippets. Refer to :ref:`Noises <resources_noises>` documentation for more information.
+
+.. tabs::
+
+    .. tab:: Gaussian noise
+
+        .. code-block:: python
+
+            from skrl.resources.noises.torch import GaussianNoise
+
+            # instantiate a noise
+            noise = GaussianNoise(mean=0, std=0.2, device=env.device)
+
+    .. tab:: Ornstein-Uhlenbeck noise
+
+        .. code-block:: python
+
+            from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise
+
+            # instantiate a noise
+            noise = OrnsteinUhlenbeckNoise(theta=0.15, sigma=0.2, base_scale=1.0, device=env.device)
+
+Noise instances are passed to the agents in their respective configuration dictionaries. For example, the DDPG agent requires the exploration noise as shown below:
+
+.. code-block:: python
+
+    from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG
+
+    agent_cfg = DDPG_DEFAULT_CONFIG.copy()
+    agent_cfg["exploration"]["noise"] = noise
 
 5. Learning rate schedulers
 ---------------------------
 
-:red:`Under construction...`
+Learning rate schedulers help RL system converge faster and improve accuracy.
+
+skrl **supports all PyTorch learning rate schedulers** and provides, as part of its resources, **additional schedulers**. Refer to :ref:`Learning rate schedulers <resources_schedulers>` documentation for more information.
+
+Learning rate schedulers classes and their respective arguments (except the :literal:`optimizer` argument) are passed to the agents in their respective configuration dictionaries. For example, for the PPO agent, one of the schedulers can be configured as shown below:
+
+.. code-block:: python
+
+    from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+    from skrl.resources.schedulers.torch import KLAdaptiveRL
+
+    agent_cfg = PPO_DEFAULT_CONFIG.copy()
+    agent_cfg["learning_rate_scheduler"] = KLAdaptiveRL
+    agent_cfg["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008}
+
+6. Preprocessors
+----------------
 
-6. Agents
+Data preprocessing can help increase the accuracy and efficiency of training by cleaning or making data suitable for machine learning models.
+
+skrl provides, as part of its resources, **preprocessors** classes. Refer to :ref:`Preprocessors <resources_preprocessors>` documentation for more information.
+
+Preprocessors classes and their respective arguments are passed to the agents in their respective configuration dictionaries. For example, for the PPO agent, one of the preprocessors can be configured as shown below:
+
+.. code-block:: python
+
+    from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+    from skrl.resources.preprocessors.torch import RunningStandardScaler
+
+    agent_cfg["state_preprocessor"] = RunningStandardScaler
+    agent_cfg["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": env.device}
+    agent_cfg["value_preprocessor"] = RunningStandardScaler
+    agent_cfg["value_preprocessor_kwargs"] = {"size": 1, "device": env.device}
+
+7. Agents
 ---------
 
-:red:`Under construction...`
+Agents are the components in charge of decision making. They are much more than models (neural networks, for example) and include the optimization algorithms that compute the optimal policy
+
+skrl provides **state-of-the-art agent**. Its implementations are focused on readability, simplicity and code transparency. Each agent is implemented independently even when two or more agents may contain code in common. Refer to each agent documentation for more information about the models and spaces they support, their respective configurations, algorithm details and more.
+
+    * :doc:`Advantage Actor Critic <../modules/skrl.agents.a2c>` (**A2C**)
+    * :doc:`Adversarial Motion Priors <../modules/skrl.agents.amp>` (**AMP**)
+    * :doc:`Cross-Entropy Method <../modules/skrl.agents.cem>` (**CEM**)
+    * :doc:`Deep Deterministic Policy Gradient <../modules/skrl.agents.ddpg>` (**DDPG**)
+    * :doc:`Double Deep Q-Network <../modules/skrl.agents.ddqn>` (**DDQN**)
+    * :doc:`Deep Q-Network <../modules/skrl.agents.dqn>` (**DQN**)
+    * :doc:`Proximal Policy Optimization <../modules/skrl.agents.ppo>` (**PPO**)
+    * :doc:`Q-learning <../modules/skrl.agents.q_learning>` (**Q-learning**)
+    * :doc:`Soft Actor-Critic <../modules/skrl.agents.sac>` (**SAC**)
+    * :doc:`State Action Reward State Action <../modules/skrl.agents.sarsa>` (**SARSA**)
+    * :doc:`Twin-Delayed DDPG <../modules/skrl.agents.td3>` (**TD3**)
+    * :doc:`Trust Region Policy Optimization <../modules/skrl.agents.trpo>` (**TRPO**)
+
+Agents generally expect, as arguments, the following components: models and memories, as well as the following variables: observation and action spaces, the device where their logic is executed and a configuration dictionary with hyperparameters and other values. The remaining components, mentioned above, are collected through the configuration dictionary. For example, the PPO agent can be instantiated as follows:
+
+.. code-block:: python
 
-7. Trainers
+    from skrl.agents.torch.ppo import PPO
+
+    agent = PPO(models=models,  # models dict
+                memory=memory,  # memory instance, or None if not required
+                cfg=agent_cfg,  # configuration dict (preprocessors, learning rate schedulers, etc.)
+                observation_space=env.observation_space,
+                action_space=env.action_space,
+                device=env.device)
+
+Agents can be saved and loaded to and from the file system. This is the **recommended practice** for loading checkpoints to perform evaluations or to continue interrupted training (since they include, in addition to models, other internal components and instances such as preprocessors or optimizers). Refer to :ref:`Saving, loading and logging <data>` (under *Checkpoints* section) for more information.
+
+8. Trainers
 -----------
 
-:red:`Under construction...`
+Now that both actors, the environment and the agent, are instantiated, it is time to put the RL system in motion.
+
+skrl offers classes (called **trainers**) that manage the interaction cycle between the environment and the agent(s) for both: training and evaluation. These classes also enable the simultaneous training and evaluation of several agents by scope (subsets of environments among all available environments), which may or may not share resources, in the same run.
+
+The following code snippets show how to load and wrap environments based on the supported interfaces:
+
+.. tabs::
+
+    .. tab:: Sequential trainer
+
+        .. code-block:: python
+
+            from skrl.trainers.torch import SequentialTrainer
+
+            # create a sequential trainer
+            cfg = {"timesteps": 50000, "headless": False}
+            trainer = SequentialTrainer(env=env, agents=[agent], cfg=cfg)
+
+            # train the agent(s)
+            trainer.train()
+
+            # evaluate the agent(s)
+            trainer.eval()
+
+    .. tab:: Parallel trainer
+
+        .. code-block:: python
+
+            from skrl.trainers.torch import ParallelTrainer
+
+            # create a parallel trainer
+            cfg = {"timesteps": 50000, "headless": False}
+            trainer = ParallelTrainer(env=env, agents=[agent], cfg=cfg)
+
+            # train the agent(s)
+            trainer.train()
+
+            # evaluate the agent(s)
+            trainer.eval()
+
+    .. tab:: Manual trainer
+
+        .. code-block:: python
+
+            from skrl.trainers.torch import ManualTrainer
+
+            # create a manual trainer
+            cfg = {"timesteps": 50000, "headless": False}
+            trainer = ManualTrainer(env=env, agents=[agent], cfg=cfg)
+
+            # train the agent(s)
+            trainer.train()
+
+            # evaluate the agent(s)
+            trainer.eval()
+
+.. raw:: html
+
+    <hr>
+
+**What's next?**
+
+Visit the :ref:`Examples <examples>` section for training and evaluation demonstrations with different environment interfaces and highlighted practices, among others.

From 66d4dd81fcc672ee00ad91b9b8af3c55effb1bf0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 1 Oct 2022 12:16:24 +0200
Subject: [PATCH 094/108] Update data checkpoints section

---
 docs/source/intro/data.rst | 164 ++++++++++++++++++++++++++++---------
 1 file changed, 125 insertions(+), 39 deletions(-)

diff --git a/docs/source/intro/data.rst b/docs/source/intro/data.rst
index fa018b8f..eb09f06c 100644
--- a/docs/source/intro/data.rst
+++ b/docs/source/intro/data.rst
@@ -1,3 +1,5 @@
+.. _data:
+
 Saving, loading and logging
 ===========================
 
@@ -7,9 +9,9 @@ Tracking metrics (TensorBoard)
 Configuration
 ^^^^^^^^^^^^^
 
-`TensorBoard <https://www.tensorflow.org/tensorboard>`_ is used for tracking and visualizing metrics and scalars (coefficients, losses, etc.). The tracking and writing of metrics and scalars is the responsibility of the agents (**can be customized independently for each agent using its configuration dictionary**)
+`TensorBoard <https://www.tensorflow.org/tensorboard>`_ is used for tracking and visualizing metrics and scalars (coefficients, losses, etc.). The tracking and writing of metrics and scalars is the responsibility of the agents (**can be customized independently for each agent using its configuration dictionary**).
 
-Each agent offers the following parameters under the :literal:`"experiment"` key
+Each agent offers the following parameters under the :literal:`"experiment"` key:
 
 .. code-block:: python
     :emphasize-lines: 5,6,7
@@ -27,16 +29,16 @@ Each agent offers the following parameters under the :literal:`"experiment"` key
         }
     }
 
-* **directory**: directory path where the data generated by the experiments (a subdirectory) are stored. If no value is set, the :literal:`runs` folder (inside the current working directory) will be used (and created if it does not exist)
+* **directory**: directory path where the data generated by the experiments (a subdirectory) are stored. If no value is set, the :literal:`runs` folder (inside the current working directory) will be used (and created if it does not exist).
 
-* **experiment_name**: name of the experiment (subdirectory). If no value is set, it will be the current date and time and the agent's name (e.g. :literal:`22-01-09_22-48-49-816281_DDPG`)
+* **experiment_name**: name of the experiment (subdirectory). If no value is set, it will be the current date and time and the agent's name (e.g. :literal:`22-01-09_22-48-49-816281_DDPG`).
 
-* **write_interval**: interval for writing metrics and values to TensorBoard (default is 250 timesteps). A value equal to or less than 0 disables tracking and writing to TensorBoard
+* **write_interval**: interval for writing metrics and values to TensorBoard (default is 250 timesteps). A value equal to or less than 0 disables tracking and writing to TensorBoard.
 
 Tracked metrics/scales visualization
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-To visualize the tracked metrics/scales, during or after the training, TensorBoard can be launched using the following command in a terminal 
+To visualize the tracked metrics/scales, during or after the training, TensorBoard can be launched using the following command in a terminal:
 
 .. code-block:: bash
 
@@ -104,7 +106,7 @@ Tracking custom metrics/scales
 
 * **Tracking custom data attached to the agent's control and timing logic (recommended)**
 
-  Although the TensorBoard's writing control and timing logic is controlled by the base class Agent, it is possible to track custom data. The :literal:`track_data` method can be used (see :doc:`Agent <../modules/skrl.agents.base_class>` class for more details), passing as arguments the data identification (tag) and the scalar value to be recorded
+  Although the TensorBoard's writing control and timing logic is controlled by the base class Agent, it is possible to track custom data. The :literal:`track_data` method can be used (see :doc:`Agent <../modules/skrl.agents.base_class>` class for more details), passing as arguments the data identification (tag) and the scalar value to be recorded.
 
   For example, to track the current CPU usage, the following code can be used:
 
@@ -115,7 +117,7 @@ Tracking custom metrics/scales
 
 * **Tracking custom data directly to Tensorboard**
 
-  It is also feasible to access directly to the `SummaryWriter <https://pytorch.org/docs/stable/tensorboard.html#torch.utils.tensorboard.writer.SummaryWriter>`_ instance through the :literal:`writer` property if it is desired to write directly to Tensorboard, avoiding the base class's control and timing logic
+  It is also feasible to access directly to the `SummaryWriter <https://pytorch.org/docs/stable/tensorboard.html#torch.utils.tensorboard.writer.SummaryWriter>`_ instance through the :literal:`writer` property if it is desired to write directly to Tensorboard, avoiding the base class's control and timing logic.
 
   For example, to write directly to TensorBoard:
 
@@ -132,9 +134,9 @@ Checkpoints
 Saving checkpoints
 ^^^^^^^^^^^^^^^^^^
 
-The checkpoints are saved in the :literal:`checkpoints` subdirectory of the experiment's directory (its path can be customized using the options described in the previous subsection). The checkpoint name is the key referring to the agent (or models, optimizers and preprocessors) and the current timestep (e.g. :literal:`runs/22-01-09_22-48-49-816281_DDPG/checkpoints/agent_2500.pt`)
+The checkpoints are saved in the :literal:`checkpoints` subdirectory of the experiment's directory (its path can be customized using the options described in the previous subsection). The checkpoint name is the key referring to the agent (or models, optimizers and preprocessors) and the current timestep (e.g. :literal:`runs/22-01-09_22-48-49-816281_DDPG/checkpoints/agent_2500.pt`).
 
-The checkpoint management, as in the previous case, is the responsibility of the agents (**can be customized independently for each agent using its configuration dictionary**)
+The checkpoint management, as in the previous case, is the responsibility of the agents (**can be customized independently for each agent using its configuration dictionary**).
 
 .. code-block:: python
     :emphasize-lines: 9,10
@@ -152,57 +154,141 @@ The checkpoint management, as in the previous case, is the responsibility of the
         }
     }
 
-* **checkpoint_interval**: interval for checkpoints (default is 1000 timesteps). A value equal to or less than 0 disables the checkpoint creation
+* **checkpoint_interval**: interval for checkpoints (default is 1000 timesteps). A value equal to or less than 0 disables the checkpoint creation.
 
-* **store_separately**: if set to :literal:`True`, all the modules that an agent contains (models, optimizers, preprocessors, etc.) will be saved each one in a separate file. By default (:literal:`False`) the modules are grouped in a dictionary and stored in the same file
+* **store_separately**: if set to :literal:`True`, all the modules that an agent contains (models, optimizers, preprocessors, etc.) will be saved each one in a separate file. By default (:literal:`False`) the modules are grouped in a dictionary and stored in the same file.
 
 **Checkpointing the best models**
 
-The best models, attending the mean total reward, will be saved in the :literal:`checkpoints` subdirectory of the experiment's directory. The checkpoint name is the word :literal:`best` and the key referring to the model (e.g. :literal:`runs/22-01-09_22-48-49-816281_DDPG/checkpoints/best_agent.pt`)
+The best models, attending the mean total reward, will be saved in the :literal:`checkpoints` subdirectory of the experiment's directory. The checkpoint name is the word :literal:`best` and the key referring to the model (e.g. :literal:`runs/22-01-09_22-48-49-816281_DDPG/checkpoints/best_agent.pt`).
 
-The best models are updated internally on each TensorBoard writing interval :literal:`"write_interval"` and they are saved on each checkpoint interval :literal:`"checkpoint_interval"`. The :literal:`"store_separately"` key specifies whether the best modules are grouped and stored together or separately
+The best models are updated internally on each TensorBoard writing interval :literal:`"write_interval"` and they are saved on each checkpoint interval :literal:`"checkpoint_interval"`. The :literal:`"store_separately"` key specifies whether the best modules are grouped and stored together or separately.
 
 Loading checkpoints
 ^^^^^^^^^^^^^^^^^^^
 
-Checkpoints can be loaded for each of the instantiated agents (or models) independently via the :literal:`.load(...)` method (`Agent.load <../modules/skrl.agents.base_class.html#skrl.agents.torch.base.Agent.load>`_ or `Model.load <../modules/skrl.models.base_class.html#skrl.models.torch.base.Model.load>`_). It accepts the path (relative or absolute) of the checkpoint to load as the only argument. The checkpoint will be dynamically mapped to the device specified as argument in the class constructor (internally the torch load's :literal:`map_location` method is used during loading)
+Checkpoints can be loaded for each of the instantiated agents (or models) independently via the :literal:`.load(...)` method (`Agent.load <../modules/skrl.agents.base_class.html#skrl.agents.torch.base.Agent.load>`_ or `Model.load <../modules/skrl.models.base_class.html#skrl.models.torch.base.Model.load>`_). It accepts the path (relative or absolute) of the checkpoint to load as the only argument. The checkpoint will be dynamically mapped to the device specified as argument in the class constructor (internally the torch load's :literal:`map_location` method is used during loading).
 
 .. note::
 
-    The agents or models instances must have the same architecture/structure as the one used to save the checkpoint. The current implementation load the model's `state_dict <https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict>`_ directly
+    The agents or models instances must have the same architecture/structure as the one used to save the checkpoint. The current implementation load the model's `state_dict <https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict>`_ directly.
 
-The following code shows how to load the checkpoint (e.g. :literal:`runs/22-01-09_22-48-49-816281_DDPG/checkpoints/2500_policy.pt`) of an instantiated policy from a specific definition. See the :ref:`Examples <examples>` section for showcases about how to load control points and use them to continue the training or evaluate experiments
+The following code snippets show how to load the checkpoints through the instantiated agent (recommended) or models. See the :ref:`Examples <examples>` section for showcases about how to load control points and use them to continue the training or evaluate experiments.
 
-.. code-block:: python
-    :emphasize-lines: 21
+.. tabs::
+
+    .. tab:: Agent (recommended)
+
+        .. code-block:: python
+            :emphasize-lines: 12
+
+            from skrl.agents.torch.ppo import PPO
+
+            # Instantiate the agent
+            agent = PPO(models=models,  # models dict
+                        memory=memory,  # memory instance, or None if not required
+                        cfg=agent_cfg,  # configuration dict (preprocessors, learning rate schedulers, etc.)
+                        observation_space=env.observation_space,
+                        action_space=env.action_space,
+                        device=env.device)
+
+            # Load the checkpoint
+            agent.load("./runs/22-09-29_22-48-49-816281_DDPG/checkpoints/agent_1200.pt")
+
+    .. tab:: Model
 
-    from skrl.models.torch import DeterministicModel
+        .. code-block:: python
+            :emphasize-lines: 22
 
-    # Define the model
-    class Policy(DeterministicModel):
-        def __init__(self, observation_space, action_space, device, clip_actions = False):
-            super().__init__(observation_space, action_space, device, clip_actions)
+            from skrl.models.torch import Model, DeterministicMixin
 
-            self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
-                                    nn.ReLU(),
-                                    nn.Linear(32, 32),
-                                    nn.ReLU(),
-                                    nn.Linear(32, self.num_actions))
+            # Define the model
+            class Policy(DeterministicMixin, Model):
+                def __init__(self, observation_space, action_space, device, clip_actions=False):
+                    Model.__init__(self, observation_space, action_space, device)
+                    DeterministicMixin.__init__(self, clip_actions)
 
-        def compute(self, states, taken_actions):
-            return self.net(states)
+                    self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                             nn.ReLU(),
+                                             nn.Linear(32, 32),
+                                             nn.ReLU(),
+                                             nn.Linear(32, self.num_actions))
 
-    # Instantiate the agent's model
-    policy = Policy(env.observation_space, env.action_space, device, clip_actions=True)
+                def compute(self, states, taken_actions, role):
+                    return self.net(states)
 
-    # Load the checkpoint
-    policy.load("./runs/22-01-09_22-48-49-816281_DDPG/checkpoints/2500_policy.pt")
+            # Instantiate the model
+            policy = Policy(env.observation_space, env.action_space, env.device, clip_actions=True)
+
+            # Load the checkpoint
+            policy.load("./runs/22-09-29_22-48-49-816281_DDPG/checkpoints/2500_policy.pt")
 
 Migrating external checkpoints
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 It is possible to load checkpoints generated with external reinforcement learning libraries into skrl agents (or models) via the :literal:`.migrate(...)` method (`Agent.migrate <../modules/skrl.agents.base_class.html#skrl.agents.torch.base.Agent.migrate>`_ or `Model.migrate <../modules/skrl.models.base_class.html#skrl.models.torch.base.Model.migrate>`_).
 
+.. note::
+
+    In some cases it will be necessary to specify a parameter mapping, especially in ambiguous models (where 2 or more parameters, for source or current model, have equal shape). Refer to the respective method documentation for more details in these cases.
+
+The following code snippets show how to migrate checkpoints from other libraries to the agents or models implemented in skrl:
+
+.. tabs::
+
+    .. tab:: Agent
+
+        .. code-block:: python
+            :emphasize-lines: 12
+
+            from skrl.agents.torch.ppo import PPO
+
+            # Instantiate the agent
+            agent = PPO(models=models,  # models dict
+                        memory=memory,  # memory instance, or None if not required
+                        cfg=agent_cfg,  # configuration dict (preprocessors, learning rate schedulers, etc.)
+                        observation_space=env.observation_space,
+                        action_space=env.action_space,
+                        device=env.device)
+
+            # Migrate a rl_games checkpoint
+            agent.migrate(path="./runs/Cartpole/nn/Cartpole.pth")
+
+    .. tab:: Model
+
+        .. code-block:: python
+            :emphasize-lines: 22, 25, 28-29
+
+            from skrl.models.torch import Model, DeterministicMixin
+
+            # Define the model
+            class Policy(DeterministicMixin, Model):
+                def __init__(self, observation_space, action_space, device, clip_actions=False):
+                    Model.__init__(self, observation_space, action_space, device)
+                    DeterministicMixin.__init__(self, clip_actions)
+
+                    self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                             nn.ReLU(),
+                                             nn.Linear(32, 32),
+                                             nn.ReLU(),
+                                             nn.Linear(32, self.num_actions))
+
+                def compute(self, states, taken_actions, role):
+                    return self.net(states)
+
+            # Instantiate the model
+            policy = Policy(env.observation_space, env.action_space, env.device, clip_actions=True)
+
+            # Migrate a rl_games checkpoint (only the model)
+            policy.migrate(path="./runs/Cartpole/nn/Cartpole.pth")
+
+            # or migrate a stable-baselines3 checkpoint
+            policy.migrate(path="./ddpg_pendulum.zip")
+
+            # or migrate a checkpoint of any other library
+            state_dict = torch.load("./external_model.pt")
+            policy.migrate(state_dict=state_dict)
+
 --------------------
 
 Memory export/import
@@ -211,7 +297,7 @@ Memory export/import
 Exporting memories
 ^^^^^^^^^^^^^^^^^^
 
-Memories can be automatically exported to files at each filling cycle (before data overwriting is performed). Its activation, the output files' format and their path can be modified through the constructor parameters when an instance is created
+Memories can be automatically exported to files at each filling cycle (before data overwriting is performed). Its activation, the output files' format and their path can be modified through the constructor parameters when an instance is created.
 
 .. code-block:: python
     :emphasize-lines: 7-9
@@ -226,11 +312,11 @@ Memories can be automatically exported to files at each filling cycle (before da
                           export_format="pt",
                           export_directory="./memories")
 
-* **export**: enable or disable the memory export (default is disabled)
+* **export**: enable or disable the memory export (default is disabled).
 
-* **export_format**: the format of the exported memory (default is :literal:`"pt"`). Supported formats are PyTorch (:literal:`"pt"`), NumPy (:literal:`"np"`) and Comma-separated values (:literal:`"csv"`)
+* **export_format**: the format of the exported memory (default is :literal:`"pt"`). Supported formats are PyTorch (:literal:`"pt"`), NumPy (:literal:`"np"`) and Comma-separated values (:literal:`"csv"`).
 
-* **export_directory**: the directory where the memory will be exported (default is :literal:`"memory"`)
+* **export_directory**: the directory where the memory will be exported (default is :literal:`"memory"`).
 
 Importing memories
 ^^^^^^^^^^^^^^^^^^

From 8d0f6633c7042812a8a8558ca17c137e18f0625d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sat, 1 Oct 2022 14:26:00 +0200
Subject: [PATCH 095/108] Update Isaac Gym and Omniverse Isaac Gym examples in
 docs

---
 docs/source/intro/examples.rst | 114 +++++++++++++++++++++++++++------
 1 file changed, 94 insertions(+), 20 deletions(-)

diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index 0b4c5b88..ffeb0791 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -300,6 +300,10 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
 .. code-block:: bash
 
+    # memory
+    memory_size = horizon_length
+
+    # agent
     rollouts = horizon_length
     learning_epochs = mini_epochs
     mini_batches = horizon_length * num_actors / minibatch_size
@@ -319,6 +323,9 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
     kl_threshold = 0
     rewards_shaper = lambda rewards, timestep, timesteps: rewards * scale_value
 
+    # trainer
+    timesteps = horizon_length * max_epochs
+
 .. note::
 
     Isaac Gym environments implement a functionality to get their configuration from the command line. Because of this feature, setting the :literal:`headless` option from the trainer configuration will not work. In this case, it is necessary to invoke the scripts as follows: :literal:`python script.py headless=True` for Isaac Gym environments (preview 3 and preview 4) or :literal:`python script.py --headless` for Isaac Gym environments (preview 2)
@@ -335,7 +342,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_allegro_hand.py
                     :language: python
-                    :emphasize-lines: 2, 61-67
+                    :emphasize-lines: 2, 19, 56-62
 
             .. tab:: Ant
                 
@@ -343,7 +350,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_ant.py
                     :language: python
-                    :emphasize-lines: 13-14, 61-62
+                    :emphasize-lines: 13-14, 56-57
 
             .. tab:: Anymal
                 
@@ -351,7 +358,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_anymal.py
                     :language: python
-                    :emphasize-lines: 13-14, 61-62
+                    :emphasize-lines: 13-14, 56-57
 
             .. tab:: AnymalTerrain
                 
@@ -359,7 +366,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_anymal_terrain.py
                     :language: python
-                    :emphasize-lines: 11, 105-108
+                    :emphasize-lines: 11, 101-104
 
             .. tab:: BallBalance
                 
@@ -367,7 +374,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_ball_balance.py
                     :language: python
-                    :emphasize-lines: 11, 104-107
+                    :emphasize-lines: 11, 96-99
 
             .. tab:: Cartpole
                 
@@ -391,7 +398,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_franka_cabinet.py
                     :language: python
-                    :emphasize-lines: 10, 93-94
+                    :emphasize-lines: 10, 84-85
 
             .. tab:: Humanoid
                 
@@ -399,7 +406,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_humanoid.py
                     :language: python
-                    :emphasize-lines: 10, 93-94
+                    :emphasize-lines: 10, 84-85
 
             .. tab:: Humanoid (AMP)
                 
@@ -415,7 +422,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_ingenuity.py
                     :language: python
-                    :emphasize-lines: 2, 61-67
+                    :emphasize-lines: 2, 19, 56-62
 
             .. tab:: Quadcopter
                 
@@ -423,7 +430,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_quadcopter.py
                     :language: python
-                    :emphasize-lines: 104
+                    :emphasize-lines: 95
 
             .. tab:: ShadowHand
                 
@@ -431,7 +438,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_shadow_hand.py
                     :language: python
-                    :emphasize-lines: 108
+                    :emphasize-lines: 97
 
             .. tab:: Trifinger
                 
@@ -439,7 +446,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/isaacgym/ppo_trifinger.py
                     :language: python
-                    :emphasize-lines: 108
+                    :emphasize-lines: 95
 
     .. tab:: Isaac Gym environments (evaluation)
 
@@ -582,13 +589,21 @@ These examples perform the training of an agent in the `Omniverse Isaac Gym envi
 
 The following components or practices are exemplified (highlighted):
 
-    - Load and wrap an Omniverse Isaac Gym environment: **AllegroHand**, **Ant**, **Cartpole**, **Humanoid**, **ShadowHand**
+    - Load and wrap an Omniverse Isaac Gym environment: **AllegroHand**, **Ant**, **Anymal**
     - Load and wrap an Omniverse Isaac Gym multi-threaded environment: **Ant (multi-threaded)**, **Cartpole (multi-threaded)**
+    - Set an input preprocessor: **AnymalTerrain**, **BallBalance**
+    - Set a random seed for reproducibility: **Cartpole**, **Crazyflie**
+    - Set a learning rate scheduler: **FrankaCabinet**, **Humanoid**
+    - Define a reward shaping function: **Ingenuity**, **Quadcopter**, **ShadowHand**
 
 The PPO agent configuration is mapped, as far as possible, from the rl_games' A2C-PPO `configuration for Omniverse Isaac Gym environments <https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs/tree/main/omniisaacgymenvs/cfg/train>`_. The following list shows the mapping between the two configurations
 
 .. code-block:: bash
 
+    # memory
+    memory_size = horizon_length
+
+    # agent
     rollouts = horizon_length
     learning_epochs = mini_epochs
     mini_batches = horizon_length * num_actors / minibatch_size
@@ -608,6 +623,9 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
     kl_threshold = 0
     rewards_shaper = lambda rewards, timestep, timesteps: rewards * scale_value
 
+    # trainer
+    timesteps = horizon_length * max_epochs
+
 .. note::
 
     Omniverse Isaac Gym environments implement a functionality to get their configuration from the command line. Because of this feature, setting the :literal:`headless` option from the trainer configuration will not work. In this case, it is necessary to invoke the scripts as follows: :literal:`python script.py headless=True`
@@ -624,7 +642,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_allegro_hand.py
                     :language: python
-                    :emphasize-lines: 11-12, 59-60
+                    :emphasize-lines: 11-12, 54-55
             
             .. tab:: Ant
                 
@@ -632,7 +650,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_ant.py
                     :language: python
-                    :emphasize-lines: 11-12, 59-60
+                    :emphasize-lines: 11-12, 54-55
 
             .. tab:: Ant (multi-threaded)
                 
@@ -640,7 +658,31 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_ant_mt.py
                     :language: python
-                    :emphasize-lines: 1, 13-14, 61-62, 126, 130
+                    :emphasize-lines: 1, 13-14, 56-57, 117, 121
+            
+            .. tab:: Anymal
+                
+                :download:`ppo_anymal.py <../examples/omniisaacgym/ppo_anymal.py>`
+
+                .. literalinclude:: ../examples/omniisaacgym/ppo_anymal.py
+                    :language: python
+                    :emphasize-lines: 11-12, 54-55
+
+            .. tab:: AnymalTerrain
+                
+                :download:`ppo_anymal_terrain.py <../examples/omniisaacgym/ppo_anymal_terrain.py>`
+
+                .. literalinclude:: ../examples/omniisaacgym/ppo_anymal_terrain.py
+                    :language: python
+                    :emphasize-lines: 9, 99-102
+
+            .. tab:: BallBalance
+
+                :download:`ppo_ball_balance.py <../examples/omniisaacgym/ppo_ball_balance.py>`
+
+                .. literalinclude:: ../examples/omniisaacgym/ppo_ball_balance.py
+                    :language: python
+                    :emphasize-lines: 9, 94-97
 
             .. tab:: Cartpole
                 
@@ -648,7 +690,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_cartpole.py
                     :language: python
-                    :emphasize-lines: 11-12, 55-56
+                    :emphasize-lines: 13, 17
 
             .. tab:: Cartpole (multi-threaded)
                 
@@ -656,23 +698,55 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_cartpole_mt.py
                     :language: python
-                    :emphasize-lines: 1, 13-14, 57-58, 122, 126
-                    
+                    :emphasize-lines: 1, 13-14, 54-55, 115, 119
+
+            .. tab:: Crazyflie
+                
+                :download:`ppo_crazy_flie.py <../examples/omniisaacgym/ppo_crazy_flie.py>`
+
+                .. literalinclude:: ../examples/omniisaacgym/ppo_crazy_flie.py
+                    :language: python
+                    :emphasize-lines: 13, 17
+
+            .. tab:: FrankaCabinet
+                
+                :download:`ppo_franka_cabinet.py <../examples/omniisaacgym/ppo_franka_cabinet.py>`
+
+                .. literalinclude:: ../examples/omniisaacgym/ppo_franka_cabinet.py
+                    :language: python
+                    :emphasize-lines: 8, 82-83
+
             .. tab:: Humanoid
                 
                 :download:`ppo_humanoid.py <../examples/omniisaacgym/ppo_humanoid.py>`
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_humanoid.py
                     :language: python
-                    :emphasize-lines: 11-12, 59-60
+                    :emphasize-lines: 8, 82-83
                     
+            .. tab:: Ingenuity
+                
+                :download:`ppo_ingenuity.py <../examples/omniisaacgym/ppo_ingenuity.py>`
+
+                .. literalinclude:: ../examples/omniisaacgym/ppo_ingenuity.py
+                    :language: python
+                    :emphasize-lines: 93
+
+            .. tab:: Quadcopter
+                
+                :download:`ppo_quadcopter.py <../examples/omniisaacgym/ppo_quadcopter.py>`
+
+                .. literalinclude:: ../examples/omniisaacgym/ppo_quadcopter.py
+                    :language: python
+                    :emphasize-lines: 93
+
             .. tab:: ShadowHand
                 
                 :download:`ppo_shadow_hand.py <../examples/omniisaacgym/ppo_shadow_hand.py>`
 
                 .. literalinclude:: ../examples/omniisaacgym/ppo_shadow_hand.py
                     :language: python
-                    :emphasize-lines: 11-12, 63-64
+                    :emphasize-lines: 95
 
 .. raw:: html
 

From 534faba9417fb79dc861be1b11129c19333279da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 2 Oct 2022 20:20:11 +0200
Subject: [PATCH 096/108] Add multivariate gaussian model to the table of agent
 models in docs

---
 docs/source/modules/skrl.agents.a2c.rst        |  6 +++---
 docs/source/modules/skrl.agents.amp.rst        |  8 ++++----
 docs/source/modules/skrl.agents.cem.rst        |  4 ++--
 docs/source/modules/skrl.agents.ddpg.rst       | 10 +++++-----
 docs/source/modules/skrl.agents.ddqn.rst       |  6 +++---
 docs/source/modules/skrl.agents.dqn.rst        |  6 +++---
 docs/source/modules/skrl.agents.ppo.rst        |  6 +++---
 docs/source/modules/skrl.agents.q_learning.rst |  4 ++--
 docs/source/modules/skrl.agents.sac.rst        | 12 ++++++------
 docs/source/modules/skrl.agents.sarsa.rst      |  4 ++--
 docs/source/modules/skrl.agents.td3.rst        | 14 +++++++-------
 docs/source/modules/skrl.agents.trpo.rst       |  6 +++---
 12 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/docs/source/modules/skrl.agents.a2c.rst b/docs/source/modules/skrl.agents.a2c.rst
index f2d52e46..bcbf2a0e 100644
--- a/docs/source/modules/skrl.agents.a2c.rst
+++ b/docs/source/modules/skrl.agents.a2c.rst
@@ -108,21 +108,21 @@ The implementation uses 1 stochastic (discrete or continuous) and 1 deterministi
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`\pi_\theta(s)`
      - Policy
      - :literal:`"policy"`
-     - :ref:`Categorical <models_categorical>` / :ref:`Gaussian <models_gaussian>`
      - observation
      - action
+     - :ref:`Categorical <models_categorical>` / :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
    * - :math:`V_\phi(s)`
      - Value
      - :literal:`"value"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - 1
+     - :ref:`Deterministic <models_deterministic>`
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.amp.rst b/docs/source/modules/skrl.agents.amp.rst
index 4ee3db5b..2c337b77 100644
--- a/docs/source/modules/skrl.agents.amp.rst
+++ b/docs/source/modules/skrl.agents.amp.rst
@@ -133,27 +133,27 @@ The implementation uses 1 stochastic (continuous) and 2 deterministic function a
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`\pi_\theta(s)`
      - Policy
      - :literal:`"policy"`
-     - :ref:`Gaussian <models_gaussian>`
      - observation
      - action
+     - :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
    * - :math:`V_\phi(s)`
      - Value
      - :literal:`"value"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - 1
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`D_\psi(s_{_{AMP}})`
      - Discriminator
      - :literal:`"discriminator"`
-     - :ref:`Deterministic <models_deterministic>`
      - AMP observation
      - 1
+     - :ref:`Deterministic <models_deterministic>`
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.cem.rst b/docs/source/modules/skrl.agents.cem.rst
index 6ef63e4c..0a774635 100644
--- a/docs/source/modules/skrl.agents.cem.rst
+++ b/docs/source/modules/skrl.agents.cem.rst
@@ -64,15 +64,15 @@ The implementation uses 1 discrete function approximator. This function approxim
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`\pi(s)`
      - Policy
      - :literal:`"policy"`
-     - :ref:`Categorical <models_categorical>`
      - observation
      - action
+     - :ref:`Categorical <models_categorical>`
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.ddpg.rst b/docs/source/modules/skrl.agents.ddpg.rst
index ae57fc80..f7547713 100644
--- a/docs/source/modules/skrl.agents.ddpg.rst
+++ b/docs/source/modules/skrl.agents.ddpg.rst
@@ -92,33 +92,33 @@ The implementation uses 4 deterministic function approximators. These function a
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`\mu_\theta(s)`
      - Policy (actor)
      - :literal:`"policy"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - action
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`\mu_{\theta_{target}}(s)`
      - Target policy
      - :literal:`"target_policy"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - action
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`Q_\phi(s, a)`
      - Q-network (critic)
      - :literal:`"critic"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation + action
      - 1
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`Q_{\phi_{target}}(s, a)`
      - Target Q-network
      - :literal:`"target_critic"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation + action
      - 1
+     - :ref:`Deterministic <models_deterministic>`
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.ddqn.rst b/docs/source/modules/skrl.agents.ddqn.rst
index f12ac7c1..4af5ea0f 100644
--- a/docs/source/modules/skrl.agents.ddqn.rst
+++ b/docs/source/modules/skrl.agents.ddqn.rst
@@ -71,21 +71,21 @@ The implementation uses 2 deterministic function approximators. These function a
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`Q_\phi(s, a)`
      - Q-network
      - :literal:`"q_network"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - action
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`Q_{\phi_{target}}(s, a)`
      - Target Q-network
      - :literal:`"target_q_network"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - action
+     - :ref:`Deterministic <models_deterministic>`
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.dqn.rst b/docs/source/modules/skrl.agents.dqn.rst
index 0ed111f8..200baf9a 100644
--- a/docs/source/modules/skrl.agents.dqn.rst
+++ b/docs/source/modules/skrl.agents.dqn.rst
@@ -71,21 +71,21 @@ The implementation uses 2 deterministic function approximators. These function a
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`Q_\phi(s, a)`
      - Q-network
      - :literal:`"q_network"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - action
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`Q_{\phi_{target}}(s, a)`
      - Target Q-network
      - :literal:`"target_q_network"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - action
+     - :ref:`Deterministic <models_deterministic>`
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.ppo.rst b/docs/source/modules/skrl.agents.ppo.rst
index 8be4f1f1..11fdb703 100644
--- a/docs/source/modules/skrl.agents.ppo.rst
+++ b/docs/source/modules/skrl.agents.ppo.rst
@@ -124,21 +124,21 @@ The implementation uses 1 stochastic (discrete or continuous) and 1 deterministi
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`\pi_\theta(s)`
      - Policy
      - :literal:`"policy"`
-     - :ref:`Categorical <models_categorical>` / :ref:`Gaussian <models_gaussian>`
      - observation
      - action
+     - :ref:`Categorical <models_categorical>` / :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
    * - :math:`V_\phi(s)`
      - Value
      - :literal:`"value"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - 1
+     - :ref:`Deterministic <models_deterministic>`
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.q_learning.rst b/docs/source/modules/skrl.agents.q_learning.rst
index 9b270188..2ea947f6 100644
--- a/docs/source/modules/skrl.agents.q_learning.rst
+++ b/docs/source/modules/skrl.agents.q_learning.rst
@@ -62,15 +62,15 @@ The implementation uses 1 table. This table (model) must be collected in a dicti
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`\pi_{Q[s,a]}(s)`
      - Policy (:math:`\epsilon`-greedy)
      - :literal:`"policy"`
-     - :ref:`Tabular <models_tabular>`
      - observation
      - action
+     - :ref:`Tabular <models_tabular>`
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.sac.rst b/docs/source/modules/skrl.agents.sac.rst
index 20dbe252..0af74b70 100644
--- a/docs/source/modules/skrl.agents.sac.rst
+++ b/docs/source/modules/skrl.agents.sac.rst
@@ -99,39 +99,39 @@ The implementation uses 1 stochastic and 4 deterministic function approximators.
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`\pi_\theta(s)`
      - Policy (actor)
      - :literal:`"policy"`
-     - :ref:`Gaussian <models_gaussian>`
      - observation
      - action
+     - :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
    * - :math:`Q_{\phi 1}(s, a)`
      - Q1-network (critic 1)
      - :literal:`"critic_1"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation + action
      - 1
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`Q_{\phi 2}(s, a)`
      - Q2-network (critic 2)
      - :literal:`"critic_2"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation + action
      - 1
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`Q_{{\phi 1}_{target}}(s, a)`
      - Target Q1-network
      - :literal:`"target_critic_1"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation + action
      - 1
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`Q_{{\phi 2}_{target}}(s, a)`
      - Target Q2-network
      - :literal:`"target_critic_2"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation + action
      - 1
+     - :ref:`Deterministic <models_deterministic>`
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.sarsa.rst b/docs/source/modules/skrl.agents.sarsa.rst
index dacfb320..9bb4f23f 100644
--- a/docs/source/modules/skrl.agents.sarsa.rst
+++ b/docs/source/modules/skrl.agents.sarsa.rst
@@ -61,15 +61,15 @@ The implementation uses 1 table. This table (model) must be collected in a dicti
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`\pi_{Q[s,a]}(s)`
      - Policy (:math:`\epsilon`-greedy)
      - :literal:`"policy"`
-     - :ref:`Tabular <models_tabular>`
      - observation
      - action
+     - :ref:`Tabular <models_tabular>`
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.td3.rst b/docs/source/modules/skrl.agents.td3.rst
index 74bb068e..eb30fb80 100644
--- a/docs/source/modules/skrl.agents.td3.rst
+++ b/docs/source/modules/skrl.agents.td3.rst
@@ -102,45 +102,45 @@ The implementation uses 6 deterministic function approximators. These function a
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`\mu_\theta(s)`
      - Policy (actor)
      - :literal:`"policy"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - action
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`\mu_{\theta_{target}}(s)`
      - Target policy
      - :literal:`"target_policy"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - action
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`Q_{\phi 1}(s, a)`
      - Q1-network (critic 1)
      - :literal:`"critic_1"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation + action
      - 1
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`Q_{\phi 2}(s, a)`
      - Q2-network (critic 2)
      - :literal:`"critic_2"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation + action
      - 1
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`Q_{{\phi 1}_{target}}(s, a)`
      - Target Q1-network
      - :literal:`"target_critic_1"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation + action
      - 1
+     - :ref:`Deterministic <models_deterministic>`
    * - :math:`Q_{{\phi 2}_{target}}(s, a)`
      - Target Q2-network
      - :literal:`"target_critic_2"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation + action
      - 1
+     - :ref:`Deterministic <models_deterministic>`
 
 API
 ^^^
diff --git a/docs/source/modules/skrl.agents.trpo.rst b/docs/source/modules/skrl.agents.trpo.rst
index 52da1555..bd2b0ad6 100644
--- a/docs/source/modules/skrl.agents.trpo.rst
+++ b/docs/source/modules/skrl.agents.trpo.rst
@@ -162,21 +162,21 @@ The implementation uses 1 stochastic and 1 deterministic function approximator.
    * - Notation
      - Concept
      - Key
-     - Type
      - Input shape
      - Output shape
+     - Type
    * - :math:`\pi_\theta(s)`
      - Policy
      - :literal:`"policy"`
-     - :ref:`Gaussian <models_gaussian>`
      - observation
      - action
+     - :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
    * - :math:`V_\phi(s)`
      - Value
      - :literal:`"value"`
-     - :ref:`Deterministic <models_deterministic>`
      - observation
      - 1
+     - :ref:`Deterministic <models_deterministic>`
 
 API
 ^^^

From 539d62836ca76a4d7b984cdfb3149c6cf4b80d28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 2 Oct 2022 20:22:52 +0200
Subject: [PATCH 097/108] Add a short introduction to each model mixin in docs

---
 .../source/modules/skrl.models.base_class.rst |  2 ++
 .../modules/skrl.models.categorical.rst       | 24 +++++++++++++++++
 .../modules/skrl.models.deterministic.rst     | 24 +++++++++++++++++
 docs/source/modules/skrl.models.gaussian.rst  | 26 +++++++++++++++++++
 .../skrl.models.multivariate_gaussian.rst     | 26 +++++++++++++++++++
 docs/source/modules/skrl.models.tabular.rst   | 24 +++++++++++++++++
 6 files changed, 126 insertions(+)

diff --git a/docs/source/modules/skrl.models.base_class.rst b/docs/source/modules/skrl.models.base_class.rst
index 221221f3..8422448e 100644
--- a/docs/source/modules/skrl.models.base_class.rst
+++ b/docs/source/modules/skrl.models.base_class.rst
@@ -1,3 +1,5 @@
+.. _models_base_class:
+
 Base class
 ==========
 
diff --git a/docs/source/modules/skrl.models.categorical.rst b/docs/source/modules/skrl.models.categorical.rst
index 66c075c9..6dc185c2 100644
--- a/docs/source/modules/skrl.models.categorical.rst
+++ b/docs/source/modules/skrl.models.categorical.rst
@@ -3,6 +3,30 @@
 Categorical model
 =================
 
+Categorical models run **discrete-domain stochastic** policies.
+
+skrl provides a Python mixin (:literal:`CategoricalMixin`) to assist in the creation of these types of models, allowing users to have full control over the function approximator definitions and architectures. Note that the use of this mixin must comply with the following rules:
+
+* The definition of multiple inheritance must always include the :ref:`Model <models_base_class>` base class at the end.
+
+  .. code-block:: python
+      :emphasize-lines: 1
+
+      class CategoricalModel(CategoricalMixin, Model):
+          def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True):
+              Model.__init__(self, observation_space, action_space, device)
+              CategoricalMixin.__init__(self, unnormalized_log_prob)
+
+* The :ref:`Model <models_base_class>` base class constructor must be invoked before the mixins constructor.
+
+  .. code-block:: python
+      :emphasize-lines: 3-4
+
+      class CategoricalModel(CategoricalMixin, Model):
+          def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True):
+              Model.__init__(self, observation_space, action_space, device)
+              CategoricalMixin.__init__(self, unnormalized_log_prob)
+
 Concept
 -------
 
diff --git a/docs/source/modules/skrl.models.deterministic.rst b/docs/source/modules/skrl.models.deterministic.rst
index 04266a88..bccbf334 100644
--- a/docs/source/modules/skrl.models.deterministic.rst
+++ b/docs/source/modules/skrl.models.deterministic.rst
@@ -3,6 +3,30 @@
 Deterministic model
 ===================
 
+Deterministic models run **continuous-domain deterministic** policies.
+
+skrl provides a Python mixin (:literal:`DeterministicMixin`) to assist in the creation of these types of models, allowing users to have full control over the function approximator definitions and architectures. Note that the use of this mixin must comply with the following rules:
+
+* The definition of multiple inheritance must always include the :ref:`Model <models_base_class>` base class at the end.
+
+  .. code-block:: python
+      :emphasize-lines: 1
+
+      class DeterministicModel(DeterministicMixin, Model):
+          def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False):
+              Model.__init__(self, observation_space, action_space, device)
+              DeterministicMixin.__init__(self, clip_actions)
+
+* The :ref:`Model <models_base_class>` base class constructor must be invoked before the mixins constructor.
+
+  .. code-block:: python
+      :emphasize-lines: 3-4
+
+      class DeterministicModel(DeterministicMixin, Model):
+          def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False):
+              Model.__init__(self, observation_space, action_space, device)
+              DeterministicMixin.__init__(self, clip_actions)
+
 Concept
 -------
 
diff --git a/docs/source/modules/skrl.models.gaussian.rst b/docs/source/modules/skrl.models.gaussian.rst
index 7a8b788b..0b7dd56a 100644
--- a/docs/source/modules/skrl.models.gaussian.rst
+++ b/docs/source/modules/skrl.models.gaussian.rst
@@ -3,6 +3,32 @@
 Gaussian model
 ==============
 
+Gaussian models run **continuous-domain stochastic** policies.
+
+skrl provides a Python mixin (:literal:`GaussianMixin`) to assist in the creation of these types of models, allowing users to have full control over the function approximator definitions and architectures. Note that the use of this mixin must comply with the following rules:
+
+* The definition of multiple inheritance must always include the :ref:`Model <models_base_class>` base class at the end.
+
+  .. code-block:: python
+      :emphasize-lines: 1
+
+      class GaussianModel(GaussianMixin, Model):
+          def __init__(self, observation_space, action_space, device="cuda:0", 
+                       clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+              Model.__init__(self, observation_space, action_space, device)
+              GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+
+* The :ref:`Model <models_base_class>` base class constructor must be invoked before the mixins constructor.
+
+  .. code-block:: python
+      :emphasize-lines: 4-5
+
+      class GaussianModel(GaussianMixin, Model):
+          def __init__(self, observation_space, action_space, device="cuda:0", 
+                       clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+              Model.__init__(self, observation_space, action_space, device)
+              GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+
 Concept
 -------
 
diff --git a/docs/source/modules/skrl.models.multivariate_gaussian.rst b/docs/source/modules/skrl.models.multivariate_gaussian.rst
index c4a20066..13661349 100644
--- a/docs/source/modules/skrl.models.multivariate_gaussian.rst
+++ b/docs/source/modules/skrl.models.multivariate_gaussian.rst
@@ -3,6 +3,32 @@
 Multivariate Gaussian model
 ===========================
 
+Multivariate Gaussian models run **continuous-domain stochastic** policies.
+
+skrl provides a Python mixin (:literal:`MultivariateGaussianMixin`) to assist in the creation of these types of models, allowing users to have full control over the function approximator definitions and architectures. Note that the use of this mixin must comply with the following rules:
+
+* The definition of multiple inheritance must always include the :ref:`Model <models_base_class>` base class at the end.
+
+  .. code-block:: python
+      :emphasize-lines: 1
+
+      class MultivariateGaussianModel(MultivariateGaussianMixin, Model):
+          def __init__(self, observation_space, action_space, device="cuda:0", 
+                       clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2):
+              Model.__init__(self, observation_space, action_space, device)
+              MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
+* The :ref:`Model <models_base_class>` base class constructor must be invoked before the mixins constructor.
+
+  .. code-block:: python
+      :emphasize-lines: 4-5
+
+      class MultivariateGaussianModel(MultivariateGaussianMixin, Model):
+          def __init__(self, observation_space, action_space, device="cuda:0", 
+                       clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2):
+              Model.__init__(self, observation_space, action_space, device)
+              MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
 Concept
 -------
 
diff --git a/docs/source/modules/skrl.models.tabular.rst b/docs/source/modules/skrl.models.tabular.rst
index 774b338a..1190c245 100644
--- a/docs/source/modules/skrl.models.tabular.rst
+++ b/docs/source/modules/skrl.models.tabular.rst
@@ -3,6 +3,30 @@
 Tabular model
 =============
 
+Tabular models run **discrete-domain deterministic/stochastic** policies.
+
+skrl provides a Python mixin (:literal:`TabularMixin`) to assist in the creation of these types of models, allowing users to have full control over the table definitions. Note that the use of this mixin must comply with the following rules:
+
+* The definition of multiple inheritance must always include the :ref:`Model <models_base_class>` base class at the end.
+
+  .. code-block:: python
+      :emphasize-lines: 1
+
+      class TabularModel(TabularMixin, Model):
+          def __init__(self, observation_space, action_space, device="cuda:0", num_envs=1):
+              Model.__init__(self, observation_space, action_space, device)
+              TabularMixin.__init__(self, num_envs)
+
+* The :ref:`Model <models_base_class>` base class constructor must be invoked before the mixins constructor.
+
+  .. code-block:: python
+      :emphasize-lines: 3-4
+
+      class TabularModel(TabularMixin, Model):
+          def __init__(self, observation_space, action_space, device="cuda:0", num_envs=1):
+              Model.__init__(self, observation_space, action_space, device)
+              TabularMixin.__init__(self, num_envs)
+
 Basic usage
 -----------
 

From e096609512ac61a6b949ad9aaf4c85d7d8b5ba14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 2 Oct 2022 23:22:24 +0200
Subject: [PATCH 098/108] Add shared models to docs

---
 docs/source/index.rst                         |  5 ++-
 .../modules/skrl.models.shared_model.rst      | 41 +++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)
 create mode 100644 docs/source/modules/skrl.models.shared_model.rst

diff --git a/docs/source/index.rst b/docs/source/index.rst
index d48d8ce9..52ca475d 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -12,7 +12,7 @@ SKRL - Reinforcement Learning library (|version|)
 
 .. warning::
 
-    **skrl** is under **active continuous development**. Make sure you always have the latest version 
+    **skrl** is under **active continuous development**. Make sure you always have the latest version. Visit the `develop <https://github.com/Toni-SM/skrl/tree/develop>`_ branch or its `documentation <https://skrl.readthedocs.io/en/develop>`_ to access the latest updates to be released.
 
 | **GitHub repository:** https://github.com/Toni-SM/skrl 
 | **Questions or discussions:** https://github.com/Toni-SM/skrl/discussions 
@@ -123,7 +123,7 @@ Memories
 Models
 ^^^^^^
 
-    Definition of helper classes for the construction of tabular functions or function approximators using artificial neural networks. This library does not provide predefined policies but helper classes to create discrete and continuous (stochastic or deterministic) policies in which the user only has to define the tables (tensors) or artificial neural networks. All models inherit from one :doc:`base class <modules/skrl.models.base_class>` that defines a uniform interface and provides for common functionalities
+    Definition of helper mixins for the construction of tabular functions or function approximators using artificial neural networks. This library does not provide predefined policies but helper mixins to create discrete and continuous (stochastic or deterministic) policies in which the user only has to define the tables (tensors) or artificial neural networks. All models inherit from one :doc:`base class <modules/skrl.models.base_class>` that defines a uniform interface and provides for common functionalities. In addition, it is possible to create :doc:`shared model <modules/skrl.models.shared_model>` by combining the implemented definitions
 
     * :doc:`Tabular model <modules/skrl.models.tabular>` (discrete domain)
     * :doc:`Categorical model <modules/skrl.models.categorical>` (discrete domain)
@@ -142,6 +142,7 @@ Models
     modules/skrl.models.gaussian
     modules/skrl.models.multivariate_gaussian
     modules/skrl.models.deterministic
+    modules/skrl.models.shared_model
 
 Trainers
 ^^^^^^^^
diff --git a/docs/source/modules/skrl.models.shared_model.rst b/docs/source/modules/skrl.models.shared_model.rst
new file mode 100644
index 00000000..fe16afcf
--- /dev/null
+++ b/docs/source/modules/skrl.models.shared_model.rst
@@ -0,0 +1,41 @@
+Shared model
+============
+
+Sometimes it is desirable to define models that use shared layers or network to represent multiple function approximators. This practice, known as *shared parameters* (or *parameter sharing*), *shared layers*, *shared model*, *shared networks* or *joint architecture* among others, is typically justified by the following criteria:
+
+* Learning the same characteristics, especially when processing large inputs (such as images, e.g.).
+
+* Reduce the number of parameters in the whole system.
+
+* Make the computation more efficient.
+
+Implementation
+--------------
+
+By combining the implemented mixins, it is possible to define shared models with skrl. In these cases, the use of the :literal:`role` argument (a Python string) is relevant. The agents will call the models by setting the :literal:`role` argument according to their requirements. Visit each agent's documentation (*Key* column of the table under *Spaces and models* section) to know the possible values that this parameter can take.
+
+The code snippet below shows how to define a shared model. The following practices for building shared models can be identified:
+
+* The definition of multiple inheritance must always include the :ref:`Model <models_base_class>` base class at the end.
+
+* The :ref:`Model <models_base_class>` base class constructor must be invoked before the mixins constructor.
+
+* All mixin constructors must be invoked.
+
+  * Specify :literal:`role` argument is optional if all constructors belong to different mixins.
+
+  * If multiple models of the same mixin type are required, the same constructor must be invoked as many times as needed. To do so, it is mandatory to specify the :literal:`role` argument.
+
+* The :literal:`.act(...)` method needs to be overridden to disambiguate its call.
+
+* The same instance of the shared model must be passed to all keys involved.
+
+.. raw:: html
+
+    <br>
+
+.. literalinclude:: ../snippets/shared_model.py
+    :language: python
+    :linenos:
+    :start-after: [start-mlp]
+    :end-before: [end-mlp]

From c52560f12d1a02d5d747f0025f39139a3baeecf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 2 Oct 2022 23:32:05 +0200
Subject: [PATCH 099/108] Add benchmark results link to docs

---
 docs/source/intro/examples.rst | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index ffeb0791..7348ee4d 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -296,7 +296,7 @@ The following components or practices are exemplified (highlighted):
     - Access to environment-specific properties and methods: **Humanoid (AMP)**
     - Load a checkpoint during evaluation: **Cartpole**
 
-The PPO agent configuration is mapped, as far as possible, from the rl_games' A2C-PPO `configuration for Isaac Gym preview environments <https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/tree/main/isaacgymenvs/cfg/train>`_. The following list shows the mapping between the two configurations
+The PPO agent configuration is mapped, as far as possible, from the rl_games' A2C-PPO `configuration for Isaac Gym preview environments <https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/tree/main/isaacgymenvs/cfg/train>`_. Shared models or separated models are used depending on the value of the :literal:`network.separate` variable. The following list shows the mapping between the two configurations:
 
 .. code-block:: bash
 
@@ -326,6 +326,8 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
     # trainer
     timesteps = horizon_length * max_epochs
 
+**Benchmark results** for Isaac Gym are listed in `Benchmark results #32 <https://github.com/Toni-SM/skrl/discussions/32>`_.
+
 .. note::
 
     Isaac Gym environments implement a functionality to get their configuration from the command line. Because of this feature, setting the :literal:`headless` option from the trainer configuration will not work. In this case, it is necessary to invoke the scripts as follows: :literal:`python script.py headless=True` for Isaac Gym environments (preview 3 and preview 4) or :literal:`python script.py --headless` for Isaac Gym environments (preview 2)
@@ -596,7 +598,7 @@ The following components or practices are exemplified (highlighted):
     - Set a learning rate scheduler: **FrankaCabinet**, **Humanoid**
     - Define a reward shaping function: **Ingenuity**, **Quadcopter**, **ShadowHand**
 
-The PPO agent configuration is mapped, as far as possible, from the rl_games' A2C-PPO `configuration for Omniverse Isaac Gym environments <https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs/tree/main/omniisaacgymenvs/cfg/train>`_. The following list shows the mapping between the two configurations
+The PPO agent configuration is mapped, as far as possible, from the rl_games' A2C-PPO `configuration for Omniverse Isaac Gym environments <https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs/tree/main/omniisaacgymenvs/cfg/train>`_. Shared models or separated models are used depending on the value of the :literal:`network.separate` variable. The following list shows the mapping between the two configurations:configurations
 
 .. code-block:: bash
 
@@ -626,6 +628,8 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
     # trainer
     timesteps = horizon_length * max_epochs
 
+**Benchmark results** for Omniverse Isaac Gym are listed in `Benchmark results #32 <https://github.com/Toni-SM/skrl/discussions/32>`_.
+
 .. note::
 
     Omniverse Isaac Gym environments implement a functionality to get their configuration from the command line. Because of this feature, setting the :literal:`headless` option from the trainer configuration will not work. In this case, it is necessary to invoke the scripts as follows: :literal:`python script.py headless=True`

From ceffaf89161eacfd3029d6a85f555f26231219b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 2 Oct 2022 23:33:22 +0200
Subject: [PATCH 100/108] Add shared model snippet to docs

---
 docs/source/snippets/shared_model.py | 48 ++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 docs/source/snippets/shared_model.py

diff --git a/docs/source/snippets/shared_model.py b/docs/source/snippets/shared_model.py
new file mode 100644
index 00000000..40182f61
--- /dev/null
+++ b/docs/source/snippets/shared_model.py
@@ -0,0 +1,48 @@
+# [start-mlp]
+import torch
+import torch.nn as nn
+
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
+
+
+# define the shared model
+class SharedModel(GaussianMixin, DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction, role="policy")
+        DeterministicMixin.__init__(self, clip_actions, role="value")
+
+        # shared layers/network
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+                                 nn.ELU(),
+                                 nn.Linear(32, 32),
+                                 nn.ELU())
+        
+        # separated layers ("policy")
+        self.mean_layer = nn.Linear(32, self.num_actions)
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+
+        # separated layer ("value")
+        self.value_layer = nn.Linear(32, 1)
+
+    # override the .act(...) method to disambiguate its call
+    def act(self, states, taken_actions, role):
+        if role == "policy":
+            return GaussianMixin.act(self, states, taken_actions, role)
+        elif role == "value":
+            return DeterministicMixin.act(self, states, taken_actions, role)
+
+    # forward the input to compute model output according to the specified role
+    def compute(self, states, taken_actions, role):
+        if role == "policy":
+            return self.mean_layer(self.net(states)), self.log_std_parameter
+        elif role == "value":
+            return self.value_layer(self.net(states))
+
+
+# instantiate the shared model and pass the same instance to the other key
+models = {}
+models["policy"] = SharedModel(env.observation_space, env.action_space, env.device)
+models["value"] = models["policy"]
+# [end-mlp]

From c326041cbe557f274271f77ce504ba7ea9e740de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 2 Oct 2022 23:36:17 +0200
Subject: [PATCH 101/108] Disable gradient computation when tensors are updated
 by their names

---
 skrl/memories/torch/base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/skrl/memories/torch/base.py b/skrl/memories/torch/base.py
index 7765854a..aaa18ab5 100644
--- a/skrl/memories/torch/base.py
+++ b/skrl/memories/torch/base.py
@@ -139,7 +139,8 @@ def set_tensor_by_name(self, name: str, tensor: torch.Tensor) -> None:
 
         :raises KeyError: The tensor does not exist
         """
-        self.tensors[name].copy_(tensor)
+        with torch.no_grad():
+            self.tensors[name].copy_(tensor)
 
     def create_tensor(self, name: str, size: Union[int, Tuple[int], gym.Space], dtype: Union[torch.dtype, None] = None) -> bool:
         """Create a new internal tensor in memory

From a46baa321856a1ad9746e31535859cc36f6d6ebc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 2 Oct 2022 23:39:44 +0200
Subject: [PATCH 102/108] Fix docstring example

---
 skrl/utils/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skrl/utils/__init__.py b/skrl/utils/__init__.py
index 778ef728..aaef12ab 100644
--- a/skrl/utils/__init__.py
+++ b/skrl/utils/__init__.py
@@ -32,8 +32,8 @@ def set_seed(seed: Optional[int] = None, deterministic: bool = False) -> int:
         42
 
         # random seed
-        >>> set_seed()
         >>> from skrl.utils import set_seed
+        >>> set_seed()
         [skrl:INFO] Seed: 1776118066
         1776118066
 

From d0470fc8d31cea02b2fc4e4b25a5e30c7648fe38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 2 Oct 2022 23:54:04 +0200
Subject: [PATCH 103/108] Add Isaac Gym Franka Emika example to docs

---
 .../reaching_franka_isaacgym_env.py           | 364 ++++++++++++++++++
 .../reaching_franka_isaacgym_skrl_eval.py     |  89 +++++
 .../reaching_franka_isaacgym_skrl_train.py    | 130 +++++++
 3 files changed, 583 insertions(+)
 create mode 100644 docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_env.py
 create mode 100644 docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py
 create mode 100644 docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py

diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_env.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_env.py
new file mode 100644
index 00000000..b4a19b7a
--- /dev/null
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_env.py
@@ -0,0 +1,364 @@
+import os
+import numpy as np
+import torch
+
+from isaacgym import gymtorch, gymapi
+
+# isaacgymenvs (VecTask class)
+import sys
+import isaacgymenvs
+sys.path.append(list(isaacgymenvs.__path__)[0])
+from tasks.base.vec_task import VecTask
+
+from skrl.utils import isaacgym_utils
+
+
+TASK_CFG = {"name": "ReachingFranka", 
+            "physics_engine": "physx", 
+            "rl_device": "cuda:0", 
+            "sim_device": "cuda:0", 
+            "graphics_device_id": 0, 
+            "headless": False, 
+            "virtual_screen_capture": False, 
+            "force_render": True,
+            "env": {"numEnvs": 1024, 
+                    "envSpacing": 1.5, 
+                    "episodeLength": 100, 
+                    "enableDebugVis": False, 
+                    "clipObservations": 1000.0, 
+                    "clipActions": 1.0, 
+                    "controlFrequencyInv": 4, 
+                    "actionScale": 2.5, 
+                    "dofVelocityScale": 0.1, 
+                    "controlSpace": "cartesian",
+                    "enableCameraSensors": False}, 
+            "sim": {"dt": 0.0083,  # 1 / 120
+                    "substeps": 1, 
+                    "up_axis": "z", 
+                    "use_gpu_pipeline": True, 
+                    "gravity": [0.0, 0.0, -9.81], 
+                    "physx": {"num_threads": 4, 
+                              "solver_type": 1, 
+                              "use_gpu": True, 
+                              "num_position_iterations": 4, 
+                              "num_velocity_iterations": 1, 
+                              "contact_offset": 0.005, 
+                              "rest_offset": 0.0, 
+                              "bounce_threshold_velocity": 0.2, 
+                              "max_depenetration_velocity": 1000.0, 
+                              "default_buffer_size_multiplier": 5.0, 
+                              "max_gpu_contact_pairs": 1048576, 
+                              "num_subscenes": 4, 
+                              "contact_collection": 0}}, 
+            "task": {"randomize": False}}
+
+
+class ReachingFrankaTask(VecTask):
+    def __init__(self, cfg):
+        self.cfg = cfg
+        rl_device = cfg["rl_device"]
+        sim_device = cfg["sim_device"]
+        graphics_device_id = cfg["graphics_device_id"]
+        headless = cfg["headless"]
+        virtual_screen_capture = cfg["virtual_screen_capture"]
+        force_render = cfg["force_render"]
+
+        self.dt = 1 / 120.0
+
+        self._action_scale = self.cfg["env"]["actionScale"]
+        self._dof_vel_scale = self.cfg["env"]["dofVelocityScale"]
+        self._control_space = self.cfg["env"]["controlSpace"]
+        self.max_episode_length = self.cfg["env"]["episodeLength"]  # name required for VecTask
+
+        self.debug_viz = self.cfg["env"]["enableDebugVis"]
+
+        # observation and action space
+        self.cfg["env"]["numObservations"] = 18
+        if self._control_space == "joint":
+            self.cfg["env"]["numActions"] = 7
+        elif self._control_space == "cartesian":
+            self.cfg["env"]["numActions"] = 3
+        else:
+            raise ValueError("Invalid control space: {}".format(self._control_space))
+
+        self._end_effector_link = "panda_leftfinger"
+
+        # setup VecTask
+        super().__init__(config=self.cfg, 
+                         rl_device=rl_device, 
+                         sim_device=sim_device, 
+                         graphics_device_id=graphics_device_id, 
+                         headless=headless, 
+                         virtual_screen_capture=virtual_screen_capture, 
+                         force_render=force_render)
+
+        # tensors and views: DOFs, roots, rigid bodies
+        dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
+        root_state_tensor = self.gym.acquire_actor_root_state_tensor(self.sim)
+        rigid_body_state_tensor = self.gym.acquire_rigid_body_state_tensor(self.sim)
+
+        self.gym.refresh_dof_state_tensor(self.sim)
+        self.gym.refresh_actor_root_state_tensor(self.sim)
+        self.gym.refresh_rigid_body_state_tensor(self.sim)
+
+        self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
+        self.root_state = gymtorch.wrap_tensor(root_state_tensor)
+        self.rigid_body_state = gymtorch.wrap_tensor(rigid_body_state_tensor)
+
+        self.dof_pos = self.dof_state.view(self.num_envs, -1, 2)[..., 0]
+        self.dof_vel = self.dof_state.view(self.num_envs, -1, 2)[..., 1]
+
+        self.root_pos = self.root_state[:, 0:3].view(self.num_envs, -1, 3)
+        self.root_rot = self.root_state[:, 3:7].view(self.num_envs, -1, 4)
+        self.root_vel_lin = self.root_state[:, 7:10].view(self.num_envs, -1, 3)
+        self.root_vel_ang = self.root_state[:, 10:13].view(self.num_envs, -1, 3)
+
+        self.rigid_body_pos = self.rigid_body_state[:, 0:3].view(self.num_envs, -1, 3)
+        self.rigid_body_rot = self.rigid_body_state[:, 3:7].view(self.num_envs, -1, 4)
+        self.rigid_body_vel_lin = self.rigid_body_state[:, 7:10].view(self.num_envs, -1, 3)
+        self.rigid_body_vel_ang = self.rigid_body_state[:, 10:13].view(self.num_envs, -1, 3)
+
+        # tensors and views: jacobian
+        if self._control_space == "cartesian":
+            jacobian_tensor = self.gym.acquire_jacobian_tensor(self.sim, "robot")
+            self.jacobian = gymtorch.wrap_tensor(jacobian_tensor)
+            self.jacobian_end_effector = self.jacobian[:, self.rigid_body_dict_robot[self._end_effector_link] - 1, :, :7]
+
+        self.reset_idx(torch.arange(self.num_envs, device=self.device))
+
+    def create_sim(self):
+        self.sim_params.up_axis = gymapi.UP_AXIS_Z
+        self.sim_params.gravity.x = 0
+        self.sim_params.gravity.y = 0
+        self.sim_params.gravity.z = -9.81
+        self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
+        self._create_ground_plane()
+        self._create_envs(self.num_envs, self.cfg["env"]["envSpacing"], int(np.sqrt(self.num_envs)))
+
+    def _create_ground_plane(self):
+        plane_params = gymapi.PlaneParams()
+        plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
+        self.gym.add_ground(self.sim, plane_params)
+
+    def _create_envs(self, num_envs, spacing, num_per_row):
+        lower = gymapi.Vec3(-spacing, -spacing, 0.0)
+        upper = gymapi.Vec3(spacing, spacing, spacing)
+
+        asset_root = os.path.join(os.path.dirname(os.path.abspath(isaacgymenvs.__file__)), "../assets")
+        robot_asset_file = "urdf/franka_description/robots/franka_panda.urdf"
+
+        # robot asset
+        asset_options = gymapi.AssetOptions()
+        asset_options.flip_visual_attachments = True
+        asset_options.fix_base_link = True
+        asset_options.collapse_fixed_joints = True
+        asset_options.disable_gravity = True
+        asset_options.thickness = 0.001
+        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_POS
+        asset_options.use_mesh_materials = True
+        robot_asset = self.gym.load_asset(self.sim, asset_root, robot_asset_file, asset_options)
+
+        # target asset
+        asset_options = gymapi.AssetOptions()
+        asset_options.fix_base_link = True
+        asset_options.collapse_fixed_joints = False
+        asset_options.disable_gravity = True
+        asset_options.thickness = 0.001
+        asset_options.use_mesh_materials = True
+        target_asset = self.gym.create_sphere(self.sim, 0.025, asset_options)
+
+        robot_dof_stiffness = torch.tensor([400, 400, 400, 400, 400, 400, 400, 1.0e6, 1.0e6], dtype=torch.float32, device=self.device)
+        robot_dof_damping = torch.tensor([80, 80, 80, 80, 80, 80, 80, 1.0e2, 1.0e2], dtype=torch.float, device=self.device)
+
+        # set robot dof properties
+        robot_dof_props = self.gym.get_asset_dof_properties(robot_asset)
+        self.robot_dof_lower_limits = []
+        self.robot_dof_upper_limits = []
+        for i in range(9):
+            robot_dof_props["driveMode"][i] = gymapi.DOF_MODE_POS
+            if self.physics_engine == gymapi.SIM_PHYSX:
+                robot_dof_props["stiffness"][i] = robot_dof_stiffness[i]
+                robot_dof_props["damping"][i] = robot_dof_damping[i]
+            else:
+                robot_dof_props["stiffness"][i] = 7000.0
+                robot_dof_props["damping"][i] = 50.0
+
+            self.robot_dof_lower_limits.append(robot_dof_props["lower"][i])
+            self.robot_dof_upper_limits.append(robot_dof_props["upper"][i])
+
+        self.robot_dof_lower_limits = torch.tensor(self.robot_dof_lower_limits, device=self.device)
+        self.robot_dof_upper_limits = torch.tensor(self.robot_dof_upper_limits, device=self.device)
+        self.robot_dof_speed_scales = torch.ones_like(self.robot_dof_lower_limits)
+        robot_dof_props["effort"][7] = 200
+        robot_dof_props["effort"][8] = 200
+
+        self.handle_targets = []
+        self.handle_robots = []
+        self.handle_envs = []
+        
+        indexes_sim_robot = []
+        indexes_sim_target = []
+
+        for i in range(self.num_envs):
+            # create env instance
+            env_ptr = self.gym.create_env(self.sim, lower, upper, num_per_row)
+
+            # create robot instance
+            pose = gymapi.Transform()
+            pose.p = gymapi.Vec3(0.0, 0.0, 0.0)
+            pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1)
+
+            robot_actor = self.gym.create_actor(env=env_ptr, 
+                                                asset=robot_asset, 
+                                                pose=pose,
+                                                name="robot", 
+                                                group=i, # collision group
+                                                filter=1, # mask off collision
+                                                segmentationId=0)
+            self.gym.set_actor_dof_properties(env_ptr, robot_actor, robot_dof_props)
+            indexes_sim_robot.append(self.gym.get_actor_index(env_ptr, robot_actor, gymapi.DOMAIN_SIM))
+
+            # create target instance
+            pose = gymapi.Transform()
+            pose.p = gymapi.Vec3(0.5, 0.0, 0.2)
+            pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1)
+
+            target_actor = self.gym.create_actor(env=env_ptr,
+                                                 asset=target_asset, 
+                                                 pose=pose,
+                                                 name="target", 
+                                                 group=i + 1, # collision group
+                                                 filter=1, # mask off collision
+                                                 segmentationId=1)
+            indexes_sim_target.append(self.gym.get_actor_index(env_ptr, target_actor, gymapi.DOMAIN_SIM))
+
+            self.gym.set_rigid_body_color(env_ptr, target_actor, 0, gymapi.MESH_VISUAL, gymapi.Vec3(1., 0., 0.))
+
+            self.handle_envs.append(env_ptr)
+            self.handle_robots.append(robot_actor)
+            self.handle_targets.append(target_actor)
+
+        self.indexes_sim_robot = torch.tensor(indexes_sim_robot, dtype=torch.int32, device=self.device)
+        self.indexes_sim_target = torch.tensor(indexes_sim_target, dtype=torch.int32, device=self.device)
+        
+        self.num_robot_dofs = self.gym.get_asset_dof_count(robot_asset)
+        self.rigid_body_dict_robot = self.gym.get_asset_rigid_body_dict(robot_asset)
+
+        self.init_data()
+
+    def init_data(self):
+        self.robot_default_dof_pos = torch.tensor(np.radians([0, -45, 0, -135, 0, 90, 45, 0, 0]), device=self.device, dtype=torch.float32)
+        self.robot_dof_targets = torch.zeros((self.num_envs, self.num_robot_dofs), device=self.device, dtype=torch.float32)
+
+        if self._control_space == "cartesian":
+            self.end_effector_pos = torch.zeros((self.num_envs, 3), device=self.device)
+            self.end_effector_rot = torch.zeros((self.num_envs, 4), device=self.device)
+
+    def compute_reward(self):
+        self.rew_buf[:] = -self._computed_distance
+
+        self.reset_buf.fill_(0)
+        # target reached
+        self.reset_buf = torch.where(self._computed_distance <= 0.035, torch.ones_like(self.reset_buf), self.reset_buf)
+        # max episode length
+        self.reset_buf = torch.where(self.progress_buf >= self.max_episode_length - 1, torch.ones_like(self.reset_buf), self.reset_buf)
+
+        # double restart correction (why?, is it necessary?)
+        self.rew_buf = torch.where(self.progress_buf == 0, -0.75 * torch.ones_like(self.reset_buf), self.rew_buf)
+        self.reset_buf = torch.where(self.progress_buf == 0, torch.zeros_like(self.reset_buf), self.reset_buf)
+
+    def compute_observations(self):
+        self.gym.refresh_dof_state_tensor(self.sim)
+        self.gym.refresh_actor_root_state_tensor(self.sim)
+        self.gym.refresh_rigid_body_state_tensor(self.sim)
+
+        if self._control_space == "cartesian":
+            self.gym.refresh_jacobian_tensors(self.sim)
+
+        robot_dof_pos = self.dof_pos
+        robot_dof_vel = self.dof_vel
+        self.end_effector_pos = self.rigid_body_pos[:, self.rigid_body_dict_robot[self._end_effector_link]]
+        self.end_effector_rot = self.rigid_body_rot[:, self.rigid_body_dict_robot[self._end_effector_link]]
+        target_pos = self.root_pos[:, 1]
+        target_rot = self.root_rot[:, 1]
+
+        dof_pos_scaled = 2.0 * (robot_dof_pos - self.robot_dof_lower_limits) \
+            / (self.robot_dof_upper_limits - self.robot_dof_lower_limits) - 1.0
+        dof_vel_scaled = robot_dof_vel * self._dof_vel_scale
+
+        generalization_noise = torch.rand((dof_vel_scaled.shape[0], 7), device=self.device) + 0.5
+
+        self.obs_buf[:, 0] = self.progress_buf / self.max_episode_length
+        self.obs_buf[:, 1:8] = dof_pos_scaled[:, :7]
+        self.obs_buf[:, 8:15] = dof_vel_scaled[:, :7] * generalization_noise
+        self.obs_buf[:, 15:18] = target_pos
+
+        # compute distance for compute_reward()
+        self._computed_distance = torch.norm(self.end_effector_pos - target_pos, dim=-1)
+
+    def reset_idx(self, env_ids):
+        # reset robot
+        pos = torch.clamp(self.robot_default_dof_pos.unsqueeze(0) + 0.25 * (torch.rand((len(env_ids), self.num_robot_dofs), device=self.device) - 0.5),
+                          self.robot_dof_lower_limits, self.robot_dof_upper_limits)
+        pos[:, 7:] = 0
+        
+        self.robot_dof_targets[env_ids, :] = pos[:]
+        self.dof_pos[env_ids, :] = pos[:]
+        self.dof_vel[env_ids, :] = 0
+
+        indexes = self.indexes_sim_robot[env_ids]
+        self.gym.set_dof_position_target_tensor_indexed(self.sim,
+                                                        gymtorch.unwrap_tensor(self.robot_dof_targets),
+                                                        gymtorch.unwrap_tensor(indexes), 
+                                                        len(env_ids))
+
+        self.gym.set_dof_state_tensor_indexed(self.sim,
+                                              gymtorch.unwrap_tensor(self.dof_state),
+                                              gymtorch.unwrap_tensor(indexes), 
+                                              len(env_ids))
+        
+        # reset targets
+        pos = (torch.rand((len(env_ids), 3), device=self.device) - 0.5) * 2
+        pos[:, 0] = 0.50 + pos[:, 0] * 0.25
+        pos[:, 1] = 0.00 + pos[:, 1] * 0.25
+        pos[:, 2] = 0.20 + pos[:, 2] * 0.10
+
+        self.root_pos[env_ids, 1, :] = pos[:]
+
+        indexes = self.indexes_sim_target[env_ids]
+        self.gym.set_actor_root_state_tensor_indexed(self.sim,
+                                                     gymtorch.unwrap_tensor(self.root_state),
+                                                     gymtorch.unwrap_tensor(indexes), 
+                                                     len(env_ids))
+
+        # bookkeeping
+        self.reset_buf[env_ids] = 0
+        self.progress_buf[env_ids] = 0
+
+    def pre_physics_step(self, actions):
+        actions = actions.clone().to(self.device)
+
+        if self._control_space == "joint":
+            targets = self.robot_dof_targets[:, :7] + self.robot_dof_speed_scales[:7] * self.dt * actions * self._action_scale
+
+        elif self._control_space == "cartesian":
+            goal_position = self.end_effector_pos + actions / 100.0
+            delta_dof_pos = isaacgym_utils.ik(jacobian_end_effector=self.jacobian_end_effector,
+                                              current_position=self.end_effector_pos,
+                                              current_orientation=self.end_effector_rot,
+                                              goal_position=goal_position,
+                                              goal_orientation=None)
+            targets = self.robot_dof_targets[:, :7] + delta_dof_pos
+
+        self.robot_dof_targets[:, :7] = torch.clamp(targets, self.robot_dof_lower_limits[:7], self.robot_dof_upper_limits[:7])
+        self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(self.robot_dof_targets))
+
+    def post_physics_step(self):
+        self.progress_buf += 1
+
+        env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
+        if len(env_ids) > 0:
+            self.reset_idx(env_ids)
+
+        self.compute_observations()
+        self.compute_reward()
diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py
new file mode 100644
index 00000000..cf30f686
--- /dev/null
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py
@@ -0,0 +1,89 @@
+import isaacgym
+
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+
+
+# Define only the policy for evaluation 
+class Policy(GaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, 64),
+                                 nn.ELU(),
+                                 nn.Linear(64, self.num_actions))
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
+
+
+# instantiate and configure the task
+headless = True  # set headless to False for rendering
+
+from reaching_franka_isaacgym_env import ReachingFrankaTask, TASK_CFG
+
+TASK_CFG["headless"] = headless
+TASK_CFG["env"]["numEnvs"] = 64
+TASK_CFG["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
+
+env = ReachingFrankaTask(cfg=TASK_CFG)
+
+# wrap the environment
+env = wrap_env(env, "isaacgym-preview4")
+
+device = env.device
+
+
+# Instantiate the agent's policy.
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+# logging to TensorBoard each 32 timesteps an ignore checkpoints
+cfg_ppo["experiment"]["write_interval"] = 32
+cfg_ppo["experiment"]["checkpoint_interval"] = 0
+
+agent = PPO(models=models_ppo,
+            memory=None, 
+            cfg=cfg_ppo, 
+            observation_space=env.observation_space, 
+            action_space=env.action_space,
+            device=device)
+
+# load checkpoints
+if TASK_CFG["env"]["controlSpace"] == "joint":
+    agent.load("./agent_joint_isaacgym.pt")
+elif TASK_CFG["env"]["controlSpace"] == "cartesian":
+    agent.load("./agent_cartesian_isaacgym.pt")
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 1000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start evaluation
+trainer.eval()
diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py
new file mode 100644
index 00000000..841129aa
--- /dev/null
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py
@@ -0,0 +1,130 @@
+import isaacgym
+
+import torch
+import torch.nn as nn
+
+# Import the skrl components to build the RL system
+from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
+from skrl.memories.torch import RandomMemory
+from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
+from skrl.resources.schedulers.torch import KLAdaptiveRL
+from skrl.resources.preprocessors.torch import RunningStandardScaler
+from skrl.trainers.torch import SequentialTrainer
+from skrl.envs.torch import wrap_env
+from skrl.utils import set_seed
+
+
+# set the seed for reproducibility
+set_seed(42)
+
+
+# Define the models (stochastic and deterministic models) for the agent using helper mixin.
+# - Policy: takes as input the environment's observation/state and returns an action
+# - Value: takes the state as input and provides a value to guide the policy
+class Policy(GaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False,
+                 clip_log_std=True, min_log_std=-20, max_log_std=2):
+        Model.__init__(self, observation_space, action_space, device)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, 64),
+                                 nn.ELU(),
+                                 nn.Linear(64, self.num_actions))
+        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
+
+    def compute(self, states, taken_actions, role):
+        return self.net(states), self.log_std_parameter
+
+class Value(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions=False):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 256),
+                                 nn.ELU(),
+                                 nn.Linear(256, 128),
+                                 nn.ELU(),
+                                 nn.Linear(128, 64),
+                                 nn.ELU(),
+                                 nn.Linear(64, 1))
+
+    def compute(self, states, taken_actions, role):
+        return self.net(states)
+
+
+# instantiate and configure the task
+headless = True  # set headless to False for rendering
+
+from reaching_franka_isaacgym_env import ReachingFrankaTask, TASK_CFG
+
+TASK_CFG["headless"] = headless
+TASK_CFG["env"]["numEnvs"] = 1024
+TASK_CFG["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
+
+env = ReachingFrankaTask(cfg=TASK_CFG)
+
+# wrap the environment
+env = wrap_env(env, "isaacgym-preview4")
+
+device = env.device
+
+
+# Instantiate a RandomMemory as rollout buffer (any memory can be used for this)
+memory = RandomMemory(memory_size=16, num_envs=env.num_envs, device=device)
+
+
+# Instantiate the agent's models (function approximators).
+# PPO requires 2 models, visit its documentation for more details
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models
+models_ppo = {}
+models_ppo["policy"] = Policy(env.observation_space, env.action_space, device)
+models_ppo["value"] = Value(env.observation_space, env.action_space, device)
+
+
+# Configure and instantiate the agent.
+# Only modify some of the default configuration, visit its documentation to see all the options
+# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters
+cfg_ppo = PPO_DEFAULT_CONFIG.copy()
+cfg_ppo["rollouts"] = 16
+cfg_ppo["learning_epochs"] = 8
+cfg_ppo["mini_batches"] = 8  
+cfg_ppo["discount_factor"] = 0.99
+cfg_ppo["lambda"] = 0.95
+cfg_ppo["learning_rate"] = 5e-4
+cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL
+cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008}
+cfg_ppo["random_timesteps"] = 0
+cfg_ppo["learning_starts"] = 0
+cfg_ppo["grad_norm_clip"] = 1.0
+cfg_ppo["ratio_clip"] = 0.2
+cfg_ppo["value_clip"] = 0.2
+cfg_ppo["clip_predicted_values"] = True
+cfg_ppo["entropy_loss_scale"] = 0.0
+cfg_ppo["value_loss_scale"] = 2.0
+cfg_ppo["kl_threshold"] = 0
+cfg_ppo["state_preprocessor"] = RunningStandardScaler
+cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
+cfg_ppo["value_preprocessor"] = RunningStandardScaler
+cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device}
+# logging to TensorBoard and write checkpoints each 32 and 250 timesteps respectively
+cfg_ppo["experiment"]["write_interval"] = 5
+cfg_ppo["experiment"]["checkpoint_interval"] = 250
+
+agent = PPO(models=models_ppo,
+            memory=memory, 
+            cfg=cfg_ppo, 
+            observation_space=env.observation_space, 
+            action_space=env.action_space,
+            device=device)
+
+
+# Configure and instantiate the RL trainer
+cfg_trainer = {"timesteps": 5000, "headless": True}
+trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
+
+# start training
+trainer.train()

From 010deab77b44b80d5ac22902bc6d51e4c788b6ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Sun, 2 Oct 2022 23:57:37 +0200
Subject: [PATCH 104/108] Rename Omniverse Isaac Gym example files for Franka
 Emika

---
 ...ranka_sim_env.py => reaching_franka_omniverse_isaacgym_env.py} | 0
 ...rl_eval.py => reaching_franka_omniverse_isaacgym_skrl_eval.py} | 0
 ..._train.py => reaching_franka_omniverse_isaacgym_skrl_train.py} | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename docs/source/examples/real_world/franka_emika_panda/{reaching_franka_sim_env.py => reaching_franka_omniverse_isaacgym_env.py} (100%)
 rename docs/source/examples/real_world/franka_emika_panda/{reaching_franka_sim_skrl_eval.py => reaching_franka_omniverse_isaacgym_skrl_eval.py} (100%)
 rename docs/source/examples/real_world/franka_emika_panda/{reaching_franka_sim_skrl_train.py => reaching_franka_omniverse_isaacgym_skrl_train.py} (100%)

diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_env.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py
similarity index 100%
rename from docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_env.py
rename to docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py
diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py
similarity index 100%
rename from docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_eval.py
rename to docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py
diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_train.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py
similarity index 100%
rename from docs/source/examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_train.py
rename to docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py

From 71dc0ef3673ffb9096f63dd81a1119688b9ee0c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 3 Oct 2022 11:42:35 +0200
Subject: [PATCH 105/108] Add Franka Emika Isaac Gym example to docs

---
 docs/source/intro/examples.rst | 70 +++++++++++++++++++++++++++++-----
 1 file changed, 61 insertions(+), 9 deletions(-)

diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst
index 7348ee4d..c15e6c92 100644
--- a/docs/source/intro/examples.rst
+++ b/docs/source/intro/examples.rst
@@ -942,6 +942,10 @@ These examples show basic real-world use cases to guide and support advanced RL
 
                 **Main environment configuration:**
 
+                .. note::
+
+                    In the joint control space the final control of the robot is performed through the Cartesian pose (forward kinematics from specified values for the joints)
+
                 The control space (Cartesian or joint), the robot motion type (waypoint or impedance) and the target position acquisition (command prompt / automatically generated or USB-camera) can be specified in the environment class constructor (from :literal:`reaching_franka_real_skrl_eval.py`) as follow:
 
                 .. code-block:: python
@@ -950,7 +954,7 @@ These examples show basic real-world use cases to guide and support advanced RL
                     motion_type = "waypoint"  # waypoint or impedance
                     camera_tracking = False   # True for USB-camera tracking
 
-            .. tab:: Simulation
+            .. tab:: Simulation (Omniverse Isaac Gym)
 
                 .. raw:: html
 
@@ -970,9 +974,9 @@ These examples show basic real-world use cases to guide and support advanced RL
 
                 **Files** (the implementation is self-contained so no specific location is required):
 
-                * Environment: :download:`reaching_franka_sim_env.py <../examples/real_world/franka_emika_panda/reaching_franka_sim_env.py>`
-                * Training script: :download:`reaching_franka_sim_skrl_train.py <../examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_train.py>`
-                * Evaluation script: :download:`reaching_franka_sim_skrl_eval.py <../examples/real_world/franka_emika_panda/reaching_franka_sim_skrl_eval.py>`
+                * Environment: :download:`reaching_franka_omniverse_isaacgym_env.py <../examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py>`
+                * Training script: :download:`reaching_franka_omniverse_isaacgym_skrl_train.py <../examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py>`
+                * Evaluation script: :download:`reaching_franka_omniverse_isaacgym_skrl_eval.py <../examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py>`
                 * Checkpoints (:literal:`agent_joint.pt`, :literal:`agent_cartesian.pt`): :download:`trained_checkpoints.zip <https://github.com/Toni-SM/skrl/files/9595293/trained_checkpoints.zip>`
 
                 **Training and evaluation:**
@@ -980,27 +984,75 @@ These examples show basic real-world use cases to guide and support advanced RL
                 .. code-block:: bash
 
                     # training (local workstation)
-                    ~/.local/share/ov/pkg/isaac_sim-*/python.sh reaching_franka_sim_skrl_train.py
+                    ~/.local/share/ov/pkg/isaac_sim-*/python.sh reaching_franka_omniverse_isaacgym_skrl_train.py
 
                     # training (docker container)
-                    /isaac-sim/python.sh reaching_franka_sim_skrl_train.py
+                    /isaac-sim/python.sh reaching_franka_omniverse_isaacgym_skrl_train.py
 
                 .. code-block:: bash
 
                     # evaluation (local workstation)
-                    ~/.local/share/ov/pkg/isaac_sim-*/python.sh reaching_franka_sim_skrl_eval.py
+                    ~/.local/share/ov/pkg/isaac_sim-*/python.sh reaching_franka_omniverse_isaacgym_skrl_eval.py
 
                     # evaluation (docker container)
-                    /isaac-sim/python.sh reaching_franka_sim_skrl_eval.py
+                    /isaac-sim/python.sh reaching_franka_omniverse_isaacgym_skrl_eval.py
 
                 **Main environment configuration:**
 
-                The control space (Cartesian or joint) can be specified in the task configuration dictionary (from :literal:`reaching_franka_sim_skrl_train.py`) as follow:
+                The control space (Cartesian or joint) can be specified in the task configuration dictionary (from :literal:`reaching_franka_omniverse_isaacgym_skrl_train.py`) as follow:
 
                 .. code-block:: python
 
                     TASK_CFG["task"]["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
 
+            .. tab:: Simulation (Isaac Gym)
+
+                .. raw:: html
+
+                    <video width="100%" controls autoplay>
+                        <source src="https://user-images.githubusercontent.com/22400377/193537523-e0f0f8ad-2295-410c-ba9a-2a16c827a498.mp4" type="video/mp4">
+                    </video>
+
+                .. raw:: html
+
+                    <img width="100%" src="https://user-images.githubusercontent.com/22400377/193546966-bcf966e6-98d8-4b41-bc15-bd7364a79381.png">
+
+                |
+
+                **Prerequisites:**
+
+                All installation steps described in Isaac Gym's `Installation <https://github.com/NVIDIA-Omniverse/IsaacGymEnvs#installation>`_ section must be fulfilled
+
+                **Files** (the implementation is self-contained so no specific location is required):
+
+                * Environment: :download:`reaching_franka_isaacgym_env.py <../examples/real_world/franka_emika_panda/reaching_franka_isaacgym_env.py>`
+                * Training script: :download:`reaching_franka_isaacgym_skrl_train.py <../examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py>`
+                * Evaluation script: :download:`reaching_franka_isaacgym_skrl_eval.py <../examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py>`
+
+                **Training and evaluation:**
+
+                .. note::
+
+                    The checkpoints obtained in Isaac Gym were not evaluated with the real robot. However, they were evaluated in Omniverse Isaac Gym showing successful performance
+
+                .. code-block:: bash
+
+                    # training (with the Python virtual environment active)
+                    python reaching_franka_isaacgym_skrl_train.py
+
+                .. code-block:: bash
+
+                    # evaluation (with the Python virtual environment active)
+                    python reaching_franka_isaacgym_skrl_eval.py
+
+                **Main environment configuration:**
+
+                The control space (Cartesian or joint) can be specified in the task configuration dictionary (from :literal:`reaching_franka_isaacgym_skrl_train.py`) as follow:
+
+                .. code-block:: python
+
+                    TASK_CFG["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
+
 .. _library_utilities:
 
 Library utilities (skrl.utils module)

From 1d6ee4b00e57f4daf28427c85b070b0f95b97dcf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 3 Oct 2022 11:45:59 +0200
Subject: [PATCH 106/108] Update CHANGELOG

---
 CHANGELOG.md | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc1a765f..2b66df84 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,7 +2,7 @@
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
-## [0.8.0] - Unreleased
+## [0.8.0] - 2022-10-03
 ### Added
 - AMP agent for physics-based character animation
 - Manual trainer
@@ -14,16 +14,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - Migrate checkpoints/models from other RL libraries to skrl models/agents
 - Configuration parameter `store_separately` to agent configuration dict
 - Save/load agent modules (models, optimizers, preprocessors)
+- Set random seed and configure deterministic behavior for reproducibility
+- Benchmark results for Isaac Gym and Omniverse Isaac Gym on the GitHub discussion page
+- Franka Emika real-world example
 
 ### Changed
 - Models implementation as Python mixin [**breaking change**]
 - Multivariate Gaussian model (`GaussianModel` until 0.7.0) to `MultivariateGaussianMixin`
 - Trainer's `cfg` parameter position and default values
-- Show training/evaluadion display progress using `tqdm` (by @JohannLange)
+- Show training/evaluation display progress using `tqdm` (by @JohannLange)
+- Update Isaac Gym and Omniverse Isaac Gym examples
 
 ### Fixed
 - Missing recursive arguments during model weights initialization
 - Tensor dimension when computing preprocessor parallel variance
+- Models' clip tensors dtype to `float32`
 
 ### Removed
 - Parameter `inference` from model methods

From 996b8eed5a160338be962d7009dd30be736a3513 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 3 Oct 2022 11:55:40 +0200
Subject: [PATCH 107/108] Clean the code and add comment for
 Cartesian-impedance control

---
 .../reaching_franka_real_env.py               | 27 ++++---------------
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py
index d67cfb0b..8afb649d 100644
--- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py
@@ -17,7 +17,11 @@ def __init__(self, robot_ip="172.16.0.2", device="cuda:0", control_space="joint"
         self.motion_type = motion_type  # waypoint or impedance
 
         if self.control_space == "cartesian" and self.motion_type == "impedance":
-            raise ValueError("Unsafe robot operation in cartesian/impedance configuration")
+            # The operation of this mode (Cartesian-impedance) was adjusted later without being able to test it on the real robot. 
+            # Dangerous movements may occur for the operator and the robot.
+            # Comment the following line of code if you want to proceed with this mode.
+            raise ValueError("See comment in the code to proceed with this mode")
+            pass
 
         # camera tracking (disabled by default)
         self.camera_tracking = camera_tracking
@@ -253,24 +257,3 @@ def render(self, *args, **kwargs):
 
     def close(self):
         pass
-
-
-
-
-if __name__ == "__main__":
-
-    # test camera capturing
-    ReachingFranka._update_target_from_camera(None)
-    exit()
-    
-
-    env = ReachingFranka()
-
-    observation = env.reset()
-    for _ in range(100):
-        observation, reward, done, info = env.step(env.action_space.sample())
-        env.render()
-        if done:
-            observation = env.reset()
-    
-    env.close()

From 77029f8c8cc33517c4510e087bd78b3c7887ca8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= <toni.semu@gmail.com>
Date: Mon, 3 Oct 2022 12:00:16 +0200
Subject: [PATCH 108/108] Update README

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f9fa13a8..c4ae3499 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 <p align="center">
-  <img width="300rem" src="docs/source/_static/data/skrl-up-transparent.png">
+  <img width="300rem" src="https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/_static/data/skrl-up-transparent.png">
 </p>
 <h2 align="center" style="border-bottom: 0 !important;">SKRL - Reinforcement Learning library</h2>
 <br>
@@ -14,7 +14,7 @@ https://skrl.readthedocs.io/en/latest/
 
 <br>
 
-> **Note:** This project is under **active continuous development**. Please make sure you always have the latest version 
+> **Note:** This project is under **active continuous development**. Please make sure you always have the latest version. Visit the [develop](https://github.com/Toni-SM/skrl/tree/develop) branch or its [documentation](https://skrl.readthedocs.io/en/develop) to access the latest updates to be released.
 
 <br>