DAG_network.py

# -*- coding: utf-8 -*-
"""projectDAG.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/14GdvtACBZEMNosKCiH5fGVW9HskFfuNM
"""

! git clone https://github.com/SofiaDandjee/data

# Libraries
import cv2
import numpy as np
import torch
from torchvision import transforms, datasets
from torch.utils import data
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
import torchvision
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
from torch.optim.lr_scheduler import ReduceLROnPlateau
import os

np.random.seed(40)

filepath = "data/cell_images/"
infpath = filepath + "Parasitized"
uninfpath = filepath + "Uninfected"

pathdir = os.listdir(filepath)
infdir = os.listdir(infpath)
uninfdir = os.listdir(uninfpath)
print(pathdir)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Check if CPU available
print(torch.cuda.is_available())

#define transform to the data for data augmentation
dataset_transform = transforms.Compose(
       [transforms.Resize((100,100)),
        transforms.ColorJitter(hue=0.05, saturation=0.05),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5295, 0.4239, 0.4530],
                             std=[0.3257, 0.2623, 0.2767])  
    ])

#import data from the GitHub repo
dataset = datasets.ImageFolder(root='data/cell_images/',transform=dataset_transform)

#define training, validation, test set sizes
n = len(dataset)
n_val = int(n*0.15)  #nb of val elements
n_test = int(n*0.15) #nb of test elements
n_train = n-n_val-n_test #nb of training elements

train_set, val_set, test_set = data.random_split(dataset, (n_train, n_val, n_test))

#define training, validation, test loaders
train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True, num_workers=0)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=32, shuffle=False, num_workers=0)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=32,shuffle=False, num_workers=0)

print(len(train_set), len(val_set), len(test_set))

def imshow(path, dir_, title, n):
  for i in range(n):
    plt.subplot(1, n, i+1)
    img = cv2.imread(path + "/" + dir_[i])
    plt.imshow(img)
    plt.title(title)
  plt.show()

#show examples of parasitized and uninfected images
imshow(infpath, infdir, 'Parasitized', 5)
imshow(uninfpath, uninfdir, 'Uninfected', 5)

torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = True

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        #1st conv layer
        self.conv1 = nn.Conv2d(3,32,3)
        self.bn1 = nn.BatchNorm2d(32)
        
        #2nd conv layer
        self.conv2 = nn.Conv2d(32, 32, 3)
        self.bn2 = nn.BatchNorm2d(32)
        
        #3rd conv layer
        self.conv3 = nn.Conv2d(32, 32, 3)
        self.bn3 = nn.BatchNorm2d(32)
        
        self.mpool = nn.MaxPool2d(2, 2)
        self.apool = nn.AvgPool2d(2, 2)
        self.dropout = nn.Dropout()
        
        #Fully connected layers
        self.lin1 = nn.Linear(49*49*32, 2)
        self.lin2 = nn.Linear(23*23*32, 2)
        self.lin3 = nn.Linear(10*10*32, 2)
        
	#define forward pass
    def forward(self, x):
        x = F.relu(self.conv1(x))
        #size: 98*98*32
        
        y1 = self.apool(x)
        y1 = self.bn1(y1)
        #size: 49*49*32
        y1 = y1.view(-1, 49*49*32)
        y1 = self.lin1(y1)
        #size: 32*2
        
        x = self.mpool(x)
        #size: 49*49*32
        x = F.relu(self.conv2(x))
        #size: 47*47*32
        
        y2 = self.apool(x)
        y2 = self.bn2(y2)
        #size: 23*23*32
        y2 = y2.view(-1, 23*23*32)
        y2 = self.lin2(y2)
        #size: 32*2
        
        x = self.mpool(x)
        #size: 23*23*32
        x = F.relu(self.conv3(x))
        #size: 21*21*32
        
        y3 = self.apool(x)
        y3 = self.bn3(y3)
        #size: 10*10*32
        y3 = y3.view(-1, 10*10*32)
        y3 = self.lin3(y3)
        #size: 32*2
        
        return y1 + y2 + y3

#create a network
net=Net()

#send net to GPU
net.to(device)

#cross entropy loss
criterion = nn.CrossEntropyLoss()

#stochastic gradient descent
optimizer = optim.SGD(net.parameters(), lr=1e-2, weight_decay = 1e-4, momentum=0.9, nesterov=True)

#scheduler to adjust learning rate
scheduler = ReduceLROnPlateau(optimizer, mode= 'min', factor=0.1, patience=1)

#number of epochs
n_epochs = 30
for epoch in range(n_epochs):
    
    running_loss=0
    
    for i, data in enumerate(train_loader, 0):
        # get the batch inputs
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        
        #Backward pass
        loss = criterion(outputs, labels)
        loss.backward()
        
        #Optimizer update
        optimizer.step()
        
        #Batch loss
        running_loss += loss.item()
        
        # print statistics every epoch
        if i == 500:
            print('[%d, %5d] loss: %.3f' %(epoch + 1, i+1, running_loss/500))
            running_loss = 0
    
    #Scheduler update
    scheduler.step(running_loss)
                  
print('Finished Training')


net.cpu()
net.eval()

import numpy as np
from sklearn.metrics import roc_auc_score

#Training accuracy and ACU
total = 0
auc_total = 0
correct = 0
i = 0
with torch.no_grad():
    for data in train_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        auc = roc_auc_score(labels.numpy(), predicted.numpy())
        auc_total += auc
        i = i + 1
       
print('Accuracy of the network on the train images: %.2f %%' % (
    100 * correct / total))
print('AUC of the network on the train images: %.2f %%' % (
    100 * auc_total / i))

#Validation accuracy and ACU
correct = 0
total = 0
auc_total = 0
i = 0
with torch.no_grad():
    for data in val_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        auc = roc_auc_score(labels.numpy(), predicted.numpy())
        auc_total += auc
        i = i + 1
print('Accuracy of the network on the validation images: %.2f %%' % (
    100 * correct / total))
print('AUC of the network on the validation images: %.2f %%' % (
    100 * auc_total / i))


#Test accuracy and ACU
correct = 0
total = 0
auc_total = 0
i = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        auc = roc_auc_score(labels.numpy(), predicted.numpy())
        auc_total += auc
        i = i + 1
print('Accuracy of the network on the test images: %.2f %%' % (
    100 * correct / total))
print('AUC of the network on the test images: %.2f %%' % (
    100 * auc_total / i))


#Validation confusion matrix
nb_classes = 2

confusion_matrix = torch.zeros(nb_classes, nb_classes)
with torch.no_grad():
    for i, (inputs, classes) in enumerate(val_loader):
        outputs = net(inputs)
        _, preds = torch.max(outputs, 1)
        for t, p in zip(classes.view(-1), preds.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

print(confusion_matrix)