model_28.py

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from spectral import SpectralNorm
import numpy as np

class Self_Attn(nn.Module):
    """ Self attention Layer"""
    def __init__(self, in_dim):
        super().__init__()
        
        # Construct the module
        self.query_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//2 , kernel_size= 1)
        self.key_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//2 , kernel_size= 1)
        self.value_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim , kernel_size= 1)
        
        self.gamma = nn.Parameter(torch.zeros(1))
        self.softmax  = nn.Softmax(dim=-1)
        
    def forward(self,x):
        """
            inputs :
                x : input feature maps( B * C * W * H)
            returns :
                out : self attention value + input feature 
                attention: B * N * N (N is Width*Height)
        """
        m_batchsize,C,width ,height = x.size()
        
        proj_query  = self.query_conv(x).view(m_batchsize, -1, width*height).permute(0,2,1) # B * N * C
        proj_key =  self.key_conv(x).view(m_batchsize, -1, width*height) # B * C * N
        energy =  torch.bmm(proj_query, proj_key) # batch matrix-matrix product
        
        attention = self.softmax(energy) # B * N * N
        proj_value = self.value_conv(x).view(m_batchsize, -1, width*height) # B * C * N
        out = torch.bmm(proj_value, attention.permute(0,2,1)) # batch matrix-matrix product
        out = out.view(m_batchsize,C,width,height) # B * C * W * H
        
        out = self.gamma*out + x
        return out, attention

class Generator(nn.Module):
    """
    Generator
    input: 
        z: latent matrix with shape of (batch_size, 100)
    output: 
        out: generated image with shape (batch_size, 1, 28, 28)
        p1: attention matrix generated by attn layer
    """
    def __init__(self, batch_size=64, attn=True, image_size=28, z_dim=100, conv_dim=64):
        super().__init__()
        self.attn = attn
        
        # Layer 1 turn 100 dims -> 512 dims, size 1 -> 3
        layer1 = []
        layer1.append(SpectralNorm(nn.ConvTranspose2d(in_channels = z_dim, out_channels = conv_dim*8, kernel_size = 3)))
        layer1.append(nn.BatchNorm2d(conv_dim*8))
        layer1.append(nn.ReLU())
        self.l1 = nn.Sequential(*layer1)
        
        # Layer 2 turn 512 dims -> 256 dims, size 3 -> 7
        layer2 = []
        layer2.append(SpectralNorm(nn.ConvTranspose2d(in_channels = conv_dim*8, out_channels = conv_dim*4, 
                                                      kernel_size = 3, stride = 2, padding = 0)))
        layer2.append(nn.BatchNorm2d(conv_dim*4))
        layer2.append(nn.ReLU())
        self.l2 = nn.Sequential(*layer2)
        
        # Layer 3 turn 256 dims -> 128 dims, size 7 -> 14
        layer3 = []
        layer3.append(SpectralNorm(nn.ConvTranspose2d(in_channels = conv_dim*4, out_channels = conv_dim*2, 
                                                      kernel_size = 4, stride = 2, padding = 1)))
        layer3.append(nn.BatchNorm2d(conv_dim*2))
        layer3.append(nn.ReLU())
        self.l3 = nn.Sequential(*layer3)

        # Layer 4 (Attn) turn 128 dims -> 128 dims
        self.attn = Self_Attn(conv_dim*2)
        
        # Layer 5 turn 128 dims -> 1 dims, size 14 -> 28
        last = []
        last.append(nn.ConvTranspose2d(conv_dim*2, 1, 4, 2, 1))
        last.append(nn.Tanh())
        self.last = nn.Sequential(*last)

    def forward(self, z):
        # z is the input random matrix for generator
        z = z.view(z.size(0), z.size(1), 1, 1)
        out=self.l1(z)
        out=self.l2(out)
        out=self.l3(out)
        if self.attn == True:
            out = self.attn(out)
        out=self.last(out)

        return out


class Discriminator(nn.Module):
    """
    Discriminator
    input:
        x: one batch of data with shape of (batch_size, 1, 28, 28)
    output: 
        out.squeeze: a batch of scalars indicating the predict results
        p1: attention matrix generated by attn layer
    """
    def __init__(self, batch_size=64, attn=True, image_size=28, conv_dim=64):
        super().__init__()
        self.attn = attn
        
        layer1 = []
        layer1.append(SpectralNorm(nn.Conv2d(1, conv_dim, 4, 2, 1)))
        layer1.append(nn.LeakyReLU(0.1))
        curr_dim = conv_dim
        self.l1 = nn.Sequential(*layer1)
        
        layer2 = []
        layer2.append(SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)))
        layer2.append(nn.LeakyReLU(0.1))
        curr_dim = curr_dim * 2
        self.l2 = nn.Sequential(*layer2)
        
        layer3 = []
        layer3.append(SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)))
        layer3.append(nn.LeakyReLU(0.1))
        curr_dim = curr_dim * 2
        self.l3 = nn.Sequential(*layer3)
        
        self.attn = Self_Attn(curr_dim)
        
        last = []
        last.append(nn.Conv2d(curr_dim, 1, 4, 2, 1))
        self.last = nn.Sequential(*last)

    def forward(self, x):
        out = self.l1(x)
        out = self.l2(out)
        out = self.l3(out)
        if self.attn == True:
            out = self.attn(out)
        out=self.last(out)

        return out.squeeze()