Add new deterministic model implementation

Toni-SM · Sep 2, 2024 · e6eca36 · e6eca36
1 parent e20a91e
commit e6eca36
Show file tree

Hide file tree

Showing 3 changed files with 132 additions and 80 deletions.
diff --git a/skrl/utils/model_instantiators/torch/__init__.py b/skrl/utils/model_instantiators/torch/__init__.py
@@ -9,11 +9,12 @@
 
 from skrl.models.torch import Model  # noqa
 from skrl.models.torch import CategoricalMixin, DeterministicMixin, GaussianMixin, MultivariateGaussianMixin  # noqa
+from skrl.utils.model_instantiators.torch.deterministic import deterministic_model
 from skrl.utils.model_instantiators.torch.gaussian import gaussian_model
 from skrl.utils.model_instantiators.torch.multivariate_gaussian import multivariate_gaussian_model
 
 
-__all__ = ["categorical_model", "deterministic_model", "Shape"]
+__all__ = ["categorical_model", "Shape"]
 
 
 class Shape(Enum):
@@ -149,82 +150,6 @@ def _generate_sequential(model: Model,
                 modules.append(_get_activation_function(hidden_activation[i], as_string=True))
     return f'nn.Sequential({", ".join(modules)})'
 
-def deterministic_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
-                        action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
-                        device: Optional[Union[str, torch.device]] = None,
-                        clip_actions: bool = False,
-                        input_shape: Shape = Shape.STATES,
-                        hiddens: list = [256, 256],
-                        hidden_activation: list = ["relu", "relu"],
-                        output_shape: Shape = Shape.ACTIONS,
-                        output_activation: Optional[str] = "tanh",
-                        output_scale: float = 1.0,
-                        return_source: bool = False) -> Union[Model, str]:
-    """Instantiate a deterministic model
-
-    :param observation_space: Observation/state space or shape (default: None).
-                              If it is not None, the num_observations property will contain the size of that space
-    :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
-    :param action_space: Action space or shape (default: None).
-                         If it is not None, the num_actions property will contain the size of that space
-    :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
-    :param device: Device on which a tensor/array is or will be allocated (default: ``None``).
-                   If None, the device will be either ``"cuda"`` if available or ``"cpu"``
-    :type device: str or torch.device, optional
-    :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: False)
-    :type clip_actions: bool, optional
-    :param input_shape: Shape of the input (default: Shape.STATES)
-    :type input_shape: Shape, optional
-    :param hiddens: Number of hidden units in each hidden layer
-    :type hiddens: int or list of ints
-    :param hidden_activation: Activation function for each hidden layer (default: "relu").
-    :type hidden_activation: list of strings
-    :param output_shape: Shape of the output (default: Shape.ACTIONS)
-    :type output_shape: Shape, optional
-    :param output_activation: Activation function for the output layer (default: "tanh")
-    :type output_activation: str or None, optional
-    :param output_scale: Scale of the output layer (default: 1.0).
-                         If None, the output layer will not be scaled
-    :type output_scale: float, optional
-    :param return_source: Whether to return the source string containing the model class used to
-                          instantiate the model rather than the model instance (default: False).
-    :type return_source: bool, optional
-
-    :return: Deterministic model instance or definition source
-    :rtype: Model
-    """
-    # network
-    net = _generate_sequential(None, input_shape, hiddens, hidden_activation, output_shape, output_activation)
-
-    # compute
-    if input_shape == Shape.OBSERVATIONS:
-        forward = 'self.net(inputs["states"])'
-    elif input_shape == Shape.ACTIONS:
-        forward = 'self.net(inputs["taken_actions"])'
-    elif input_shape == Shape.STATES_ACTIONS:
-        forward = 'self.net(torch.cat((inputs["states"], inputs["taken_actions"]), dim=1))'
-    if output_scale != 1:
-        forward = f"{output_scale} * {forward}"
-
-    template = f"""class DeterministicModel(DeterministicMixin, Model):
-    def __init__(self, observation_space, action_space, device, clip_actions):
-        Model.__init__(self, observation_space, action_space, device)
-        DeterministicMixin.__init__(self, clip_actions)
-
-        self.net = {net}
-
-    def compute(self, inputs, role=""):
-        return {forward}, {{}}
-    """
-    if return_source:
-        return template
-    _locals = {}
-    exec(template, globals(), _locals)
-    return _locals["DeterministicModel"](observation_space=observation_space,
-                                         action_space=action_space,
-                                         device=device,
-                                         clip_actions=clip_actions)
-
 def categorical_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                       action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                       device: Optional[Union[str, torch.device]] = None,

diff --git a/skrl/utils/model_instantiators/torch/deterministic.py b/skrl/utils/model_instantiators/torch/deterministic.py
@@ -0,0 +1,88 @@
+from typing import Any, Mapping, Optional, Sequence, Tuple, Union
+
+import textwrap
+import gym
+import gymnasium
+
+import torch
+import torch.nn as nn  # noqa
+
+from skrl.models.torch import DeterministicMixin  # noqa
+from skrl.models.torch import Model
+from skrl.utils.model_instantiators.torch.common import generate_containers
+
+
+def deterministic_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
+                        action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
+                        device: Optional[Union[str, torch.device]] = None,
+                        clip_actions: bool = False,
+                        network: Sequence[Mapping[str, Any]] = [],
+                        output: Union[str, Sequence[str]] = "",
+                        return_source: bool = False) -> Union[Model, str]:
+    """Instantiate a deterministic model
+
+    :param observation_space: Observation/state space or shape (default: None).
+                              If it is not None, the num_observations property will contain the size of that space
+    :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
+    :param action_space: Action space or shape (default: None).
+                         If it is not None, the num_actions property will contain the size of that space
+    :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
+    :param device: Device on which a tensor/array is or will be allocated (default: ``None``).
+                   If None, the device will be either ``"cuda"`` if available or ``"cpu"``
+    :type device: str or torch.device, optional
+    :param clip_actions: Flag to indicate whether the actions should be clipped (default: False)
+    :type clip_actions: bool, optional
+    :param network: Network definition (default: [])
+    :type network: list of dict, optional
+    :param output: Output expression (default: "")
+    :type output: list or str, optional
+    :param return_source: Whether to return the source string containing the model class used to
+                          instantiate the model rather than the model instance (default: False).
+    :type return_source: bool, optional
+
+    :return: Deterministic model instance or definition source
+    :rtype: Model
+    """
+    containers, output = generate_containers(network, output, embed_output=True, indent=1)
+
+    # network definitions
+    networks = []
+    forward: list[str] = []
+    for container in containers:
+        networks.append(f'self.{container["name"]}_container = {container["sequential"]}')
+        forward.append(f'{container["name"]} = self.{container["name"]}_container({container["input"]})')
+    # process output
+    if output["modules"]:
+        networks.append(f'self.output_layer = {output["modules"][0]}')
+        forward.append(f'output = self.output_layer({container["name"]})')
+    elif output["output"]:
+        forward.append(f'output = {output["output"]}')
+    else:
+        forward[-1] = forward[-1].replace(container["name"], "output", 1)
+
+    # build substitutions and indent content
+    networks = textwrap.indent("\n".join(networks), prefix=" " * 8)[8:]
+    forward = textwrap.indent("\n".join(forward), prefix=" " * 8)[8:]
+
+    template = f"""class DeterministicModel(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device, clip_actions):
+        Model.__init__(self, observation_space, action_space, device)
+        DeterministicMixin.__init__(self, clip_actions)
+
+        {networks}
+
+    def compute(self, inputs, role=""):
+        {forward}
+        return output, {{}}
+    """
+    # return source
+    if return_source:
+        return template
+
+    # instantiate model
+    _locals = {}
+    exec(template, globals(), _locals)
+    return _locals["DeterministicModel"](observation_space=observation_space,
+                                         action_space=action_space,
+                                         device=device,
+                                         clip_actions=clip_actions)
diff --git a/tests/torch/test_torch_model_instantiators_definition.py b/tests/torch/test_torch_model_instantiators_definition.py
@@ -8,7 +8,7 @@
 import numpy as np
 import torch
 
-from skrl.utils.model_instantiators.torch import gaussian_model, multivariate_gaussian_model
+from skrl.utils.model_instantiators.torch import deterministic_model, gaussian_model, multivariate_gaussian_model
 from skrl.utils.model_instantiators.torch.common import Shape, _generate_modules, _get_activation_function, _parse_input
 
 
@@ -171,7 +171,6 @@ def test_gaussian_model(capsys):
                            device=device,
                            return_source=False,
                            **content)
-
     model.to(device=device)
     with capsys.disabled():
         print(model)
@@ -216,11 +215,51 @@ def test_multivariate_gaussian_model(capsys):
                                         device=device,
                                         return_source=False,
                                         **content)
-
     model.to(device=device)
     with capsys.disabled():
         print(model)
 
     observations = torch.ones((10, model.num_observations), device=device)
     output = model.act({"states": observations})
     assert output[0].shape == (10, 2)
+
+def test_deterministic_model(capsys):
+    device = "cpu"
+    observation_space = gym.spaces.Box(np.array([-1] * 5), np.array([1] * 5))
+    action_space = gym.spaces.Box(np.array([-1] * 3), np.array([1] * 3))
+
+    content = r"""
+    clip_actions: True
+    network:
+      - name: net
+        input: Shape.OBSERVATIONS
+        layers:
+          - linear: 32
+          - linear: [32]
+          - linear: {out_features: 32}
+          - linear: {out_features: ACTIONS}
+        activations: elu
+    output: net / 10
+    """
+    content = yaml.safe_load(content)
+    # source
+    model = deterministic_model(observation_space=observation_space,
+                                action_space=action_space,
+                                device=device,
+                                return_source=True,
+                                **content)
+    with capsys.disabled():
+        print(model)
+    # instance
+    model = deterministic_model(observation_space=observation_space,
+                                action_space=action_space,
+                                device=device,
+                                return_source=False,
+                                **content)
+    model.to(device=device)
+    with capsys.disabled():
+        print(model)
+
+    observations = torch.ones((10, model.num_observations), device=device)
+    output = model.act({"states": observations})
+    assert output[0].shape == (10, 3)