Skip to content

Commit

Permalink
Add new deterministic model implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
Toni-SM committed Sep 2, 2024
1 parent e20a91e commit e6eca36
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 80 deletions.
79 changes: 2 additions & 77 deletions skrl/utils/model_instantiators/torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@

from skrl.models.torch import Model # noqa
from skrl.models.torch import CategoricalMixin, DeterministicMixin, GaussianMixin, MultivariateGaussianMixin # noqa
from skrl.utils.model_instantiators.torch.deterministic import deterministic_model
from skrl.utils.model_instantiators.torch.gaussian import gaussian_model
from skrl.utils.model_instantiators.torch.multivariate_gaussian import multivariate_gaussian_model


__all__ = ["categorical_model", "deterministic_model", "Shape"]
__all__ = ["categorical_model", "Shape"]


class Shape(Enum):
Expand Down Expand Up @@ -149,82 +150,6 @@ def _generate_sequential(model: Model,
modules.append(_get_activation_function(hidden_activation[i], as_string=True))
return f'nn.Sequential({", ".join(modules)})'

def deterministic_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
device: Optional[Union[str, torch.device]] = None,
clip_actions: bool = False,
input_shape: Shape = Shape.STATES,
hiddens: list = [256, 256],
hidden_activation: list = ["relu", "relu"],
output_shape: Shape = Shape.ACTIONS,
output_activation: Optional[str] = "tanh",
output_scale: float = 1.0,
return_source: bool = False) -> Union[Model, str]:
"""Instantiate a deterministic model
:param observation_space: Observation/state space or shape (default: None).
If it is not None, the num_observations property will contain the size of that space
:type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
:param action_space: Action space or shape (default: None).
If it is not None, the num_actions property will contain the size of that space
:type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
:param device: Device on which a tensor/array is or will be allocated (default: ``None``).
If None, the device will be either ``"cuda"`` if available or ``"cpu"``
:type device: str or torch.device, optional
:param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: False)
:type clip_actions: bool, optional
:param input_shape: Shape of the input (default: Shape.STATES)
:type input_shape: Shape, optional
:param hiddens: Number of hidden units in each hidden layer
:type hiddens: int or list of ints
:param hidden_activation: Activation function for each hidden layer (default: "relu").
:type hidden_activation: list of strings
:param output_shape: Shape of the output (default: Shape.ACTIONS)
:type output_shape: Shape, optional
:param output_activation: Activation function for the output layer (default: "tanh")
:type output_activation: str or None, optional
:param output_scale: Scale of the output layer (default: 1.0).
If None, the output layer will not be scaled
:type output_scale: float, optional
:param return_source: Whether to return the source string containing the model class used to
instantiate the model rather than the model instance (default: False).
:type return_source: bool, optional
:return: Deterministic model instance or definition source
:rtype: Model
"""
# network
net = _generate_sequential(None, input_shape, hiddens, hidden_activation, output_shape, output_activation)

# compute
if input_shape == Shape.OBSERVATIONS:
forward = 'self.net(inputs["states"])'
elif input_shape == Shape.ACTIONS:
forward = 'self.net(inputs["taken_actions"])'
elif input_shape == Shape.STATES_ACTIONS:
forward = 'self.net(torch.cat((inputs["states"], inputs["taken_actions"]), dim=1))'
if output_scale != 1:
forward = f"{output_scale} * {forward}"

template = f"""class DeterministicModel(DeterministicMixin, Model):
def __init__(self, observation_space, action_space, device, clip_actions):
Model.__init__(self, observation_space, action_space, device)
DeterministicMixin.__init__(self, clip_actions)
self.net = {net}
def compute(self, inputs, role=""):
return {forward}, {{}}
"""
if return_source:
return template
_locals = {}
exec(template, globals(), _locals)
return _locals["DeterministicModel"](observation_space=observation_space,
action_space=action_space,
device=device,
clip_actions=clip_actions)

def categorical_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
device: Optional[Union[str, torch.device]] = None,
Expand Down
88 changes: 88 additions & 0 deletions skrl/utils/model_instantiators/torch/deterministic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from typing import Any, Mapping, Optional, Sequence, Tuple, Union

import textwrap
import gym
import gymnasium

import torch
import torch.nn as nn # noqa

from skrl.models.torch import DeterministicMixin # noqa
from skrl.models.torch import Model
from skrl.utils.model_instantiators.torch.common import generate_containers


def deterministic_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
device: Optional[Union[str, torch.device]] = None,
clip_actions: bool = False,
network: Sequence[Mapping[str, Any]] = [],
output: Union[str, Sequence[str]] = "",
return_source: bool = False) -> Union[Model, str]:
"""Instantiate a deterministic model
:param observation_space: Observation/state space or shape (default: None).
If it is not None, the num_observations property will contain the size of that space
:type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
:param action_space: Action space or shape (default: None).
If it is not None, the num_actions property will contain the size of that space
:type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional
:param device: Device on which a tensor/array is or will be allocated (default: ``None``).
If None, the device will be either ``"cuda"`` if available or ``"cpu"``
:type device: str or torch.device, optional
:param clip_actions: Flag to indicate whether the actions should be clipped (default: False)
:type clip_actions: bool, optional
:param network: Network definition (default: [])
:type network: list of dict, optional
:param output: Output expression (default: "")
:type output: list or str, optional
:param return_source: Whether to return the source string containing the model class used to
instantiate the model rather than the model instance (default: False).
:type return_source: bool, optional
:return: Deterministic model instance or definition source
:rtype: Model
"""
containers, output = generate_containers(network, output, embed_output=True, indent=1)

# network definitions
networks = []
forward: list[str] = []
for container in containers:
networks.append(f'self.{container["name"]}_container = {container["sequential"]}')
forward.append(f'{container["name"]} = self.{container["name"]}_container({container["input"]})')
# process output
if output["modules"]:
networks.append(f'self.output_layer = {output["modules"][0]}')
forward.append(f'output = self.output_layer({container["name"]})')
elif output["output"]:
forward.append(f'output = {output["output"]}')
else:
forward[-1] = forward[-1].replace(container["name"], "output", 1)

# build substitutions and indent content
networks = textwrap.indent("\n".join(networks), prefix=" " * 8)[8:]
forward = textwrap.indent("\n".join(forward), prefix=" " * 8)[8:]

template = f"""class DeterministicModel(DeterministicMixin, Model):
def __init__(self, observation_space, action_space, device, clip_actions):
Model.__init__(self, observation_space, action_space, device)
DeterministicMixin.__init__(self, clip_actions)
{networks}
def compute(self, inputs, role=""):
{forward}
return output, {{}}
"""
# return source
if return_source:
return template

# instantiate model
_locals = {}
exec(template, globals(), _locals)
return _locals["DeterministicModel"](observation_space=observation_space,
action_space=action_space,
device=device,
clip_actions=clip_actions)
45 changes: 42 additions & 3 deletions tests/torch/test_torch_model_instantiators_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np
import torch

from skrl.utils.model_instantiators.torch import gaussian_model, multivariate_gaussian_model
from skrl.utils.model_instantiators.torch import deterministic_model, gaussian_model, multivariate_gaussian_model
from skrl.utils.model_instantiators.torch.common import Shape, _generate_modules, _get_activation_function, _parse_input


Expand Down Expand Up @@ -171,7 +171,6 @@ def test_gaussian_model(capsys):
device=device,
return_source=False,
**content)

model.to(device=device)
with capsys.disabled():
print(model)
Expand Down Expand Up @@ -216,11 +215,51 @@ def test_multivariate_gaussian_model(capsys):
device=device,
return_source=False,
**content)

model.to(device=device)
with capsys.disabled():
print(model)

observations = torch.ones((10, model.num_observations), device=device)
output = model.act({"states": observations})
assert output[0].shape == (10, 2)

def test_deterministic_model(capsys):
device = "cpu"
observation_space = gym.spaces.Box(np.array([-1] * 5), np.array([1] * 5))
action_space = gym.spaces.Box(np.array([-1] * 3), np.array([1] * 3))

content = r"""
clip_actions: True
network:
- name: net
input: Shape.OBSERVATIONS
layers:
- linear: 32
- linear: [32]
- linear: {out_features: 32}
- linear: {out_features: ACTIONS}
activations: elu
output: net / 10
"""
content = yaml.safe_load(content)
# source
model = deterministic_model(observation_space=observation_space,
action_space=action_space,
device=device,
return_source=True,
**content)
with capsys.disabled():
print(model)
# instance
model = deterministic_model(observation_space=observation_space,
action_space=action_space,
device=device,
return_source=False,
**content)
model.to(device=device)
with capsys.disabled():
print(model)

observations = torch.ones((10, model.num_observations), device=device)
output = model.act({"states": observations})
assert output[0].shape == (10, 3)

0 comments on commit e6eca36

Please sign in to comment.