model.py

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def hidden_init(layer):
    fan_in = layer.weight.data.size()[0]
    lim = 1. / np.sqrt(fan_in)
    return -lim, lim


class ActorNetwork(nn.Module):
    """Actor (Policy) Model."""

    def __init__(self, state_size, action_size, seed=10, fc_units=384):
        """Initialize parameters and build model.

        :param state_size: (int), Dimension of each state
        :param action_size: (int), Dimension of each action
        :param seed: (int), Random seed
        :param fc_units: (int), Number of nodes in the first and second hidden layer
        """

        super(ActorNetwork, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.fc1 = nn.Linear(state_size, fc_units)
        self.fc2 = nn.Linear(fc_units, action_size)
        self.reset_parameters()

    def reset_parameters(self):
        self.fc1.weight.data.uniform_(*hidden_init(self.fc1))
        self.fc2.weight.data.uniform_(-3e-3, 3e-3)

    def forward(self, state):
        """Maps states -> actions.

        :param state: (PyTorch tensor) A batch of states
        :returns: PyTorch tensor containing the action values
        """
        x = F.relu(self.fc1(state))
        return torch.tanh(self.fc2(x))

    def get_action(self, state):
        """Maps states -> actions.

        :param state: (nd-array), A single state
        :returns: Numpy array containing the action values
        """
        state = torch.FloatTensor(state).unsqueeze(0).to(device)
        action = self.forward(state)
        return action.squeeze(0).detach().cpu().numpy()


class CriticNetwork(nn.Module):
    """Critic (Value) Model."""

    def __init__(self, state_size, action_size, seed=10, fcs1_units=512, fc2_units=256, fc3_units=128):
        """Initialize parameters and build model.

        :param state_size: (int), Dimension of each state
        :param action_size: (int), Dimension of each action
        :param seed: (int), Random seed
        :param fcs1_units: (int), Number of nodes in the first hidden layer
        :param fc2_units: (int), Number of nodes in the second hidden layer
        :param fc3_units: (int), Number of nodes in the third hidden layer
        """
        super(CriticNetwork, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.fcs1 = nn.Linear(state_size, fcs1_units)
        self.fc2 = nn.Linear(fcs1_units+action_size, fc2_units)
        self.fc3 = nn.Linear(fc2_units, fc3_units)
        self.fc4 = nn.Linear(fc3_units, 1)
        self.reset_parameters()

    def reset_parameters(self):
        self.fcs1.weight.data.uniform_(*hidden_init(self.fcs1))
        self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
        self.fc3.weight.data.uniform_(*hidden_init(self.fc3))
        self.fc4.weight.data.uniform_(-3e-3, 3e-3)

    def forward(self, state, action):
        """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
        xs = F.leaky_relu(self.fcs1(state))
        x = torch.cat((xs, action), dim=1)
        x = F.leaky_relu(self.fc2(x))
        x = F.leaky_relu(self.fc3(x))
        return self.fc4(x)