diff --git a/recognition/Miller/README.MD b/recognition/Miller/README.MD new file mode 100644 index 0000000000..5b59bad248 --- /dev/null +++ b/recognition/Miller/README.MD @@ -0,0 +1,53 @@ +# Vector Quantized Variational Auto-encoder(VQ VAE Model) + +In this report, a generative model of the Vector Quantized Variational AutoEncoder (VQ VAE) was used to generate reconstructed images of the OASIS brain data set that are "reasonably clear" and have a Structured Similarity (SSIM) of over 0.6. The VQ VAE was adapted using tensorflow keras. + +#### Description of VQ VAE Algorithm +![](https://miro.medium.com/max/1400/1*yRdNe3xi4f3KV6ULW7yArA.png) +>Figure 1: Graphical representation of a VQ-VAE network. + +A standard VAE (encoder->decoder) uses a continous latent space that is sampled using gaussain distribution; this makes it hard to learn a continuous distribution with a gradient descent. In comparison, VQ VAE uses a discrete latent space; and consists of three parts as seen above: + +1. Encoder: + * Convolutional network to downsample the features of an image +2. Latent Space: + * Codebook consists of n latent embedding vectors of dimension D each + * Each code represents the distance between each embedding and encoded output (euclidean distance) ->outputs embeded vector + * feed closest encoder output to codebook as input to decoder +3. Decoder: + * Convolutional network to upsample and gnerate reconstructed samples. + +#### ==============Oasis Brain Data Set============== +![](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRl7czOsj3uzWRQ6NT2ofed7QBsKiqrUq6Bsw&usqp=CAU) +>Figure 2: Comparison of an image stored in the train vs test data sets + +The Oasis MRI Dataset cobtains 9664 training images, 544 test images and 1120 validation images. An example of train and test data is shown above. The images are preloaded into a file location and from there extracted into processing for use. + +##### Data Pre-Processing + +Before the data was used, it was normalised through residual extration and rescaling. This makes it easier to compare the distributions with different means and scales to maintain the shape of the distribution. + +## ==============Training============== + +The three data groups - train, test, and validate are split 0.85/0.1/0.05. The training set contains the most images so the model has enough information to learn from to produce accurate reconstructions later. The test set is used to validate these reconstructions. The validation set is not required, as the model is judged by the quality of the reconstructons on the test set. The model is trained with ... epochs on a batch size of 128. +*insert image + +## ==============Results============== + +The reconstructed images achieved a mean Structured Similarity of ... +*Inerset image +## Dependencies +* Python 3.7 +* TensorFlow 2.6.0 +* Numpy 1.19.5 +* matplotlib 3.2.2 +* Pillow 7.1.2 +* os +* Pre-processed OASIS MRI dataset (accessible at https://cloudstor.aarnet.edu.au/plus/s/n5aZ4XX1WBKp6HZ/download). + +## References +[1] A. v. d. Oord, O. Vinyals, and K. Kavukcuoglu, 2018. Neural Discrete Representation Learning. [Online]. Available at: https://arxiv.org/pdf/1711.00937.pdf. + +[2] Paul, S., 2021. Keras documentation: Vector-Quantized Variational Autoencoders. [online] Keras.io. Available at: https://keras.io/examples/generative/vq_vae/. + +[3] https://github.com/shakes76/PatternFlow/tree/master/recognition/MySolution diff --git a/recognition/Miller/dataset.py b/recognition/Miller/dataset.py new file mode 100644 index 0000000000..ecb9d05c38 --- /dev/null +++ b/recognition/Miller/dataset.py @@ -0,0 +1,113 @@ +""" +dataset.py" containing the data loader for loading and preprocessing your data + +This was file utilises and modifies the fucntions found in https://github.com/shakes76/PatternFlow/tree/master/recognition/MySolution +""" + +import tensorflow as tf +import glob +import numpy as np +from matplotlib import image +import os +from PIL import Image + + +# Download the Oasis Data as zip file. Will need to extract it manually afterwards +def download_oasis (): + + dataset_url = "https://cloudstor.aarnet.edu.au/plus/s/n5aZ4XX1WBKp6HZ/download" + + # Download file from URL Path, origin=path, fname=file name, untar=compress file + tf.keras.utils.get_file(origin=dataset_url,fname='oa-sis' ,untar=True) + +# Loads the training images (non segmented) from given path and returns an numpy array of arrays +def load_training (path): + + image_list = [] + # Iterate through all paths and convert to 'png' + for filename in glob.glob(path + '/*.png'): + # Read an image from the given filename into an array + im = image.imread (filename) + # Append array to list + image_list.append(im) + + print('train_X shape:', np.array(image_list).shape) + + # Create an numpy array to hold all the array turned images + train_set = np.array(image_list, dtype=np.float32) + + + return train_set + +# Normalizes training images and adds 4th dimention +def process_training (data_set): + + """ Residual Extraction -> Useful for comparing distributions with different means but similar shapes""" + # Calculate the residuals of the data - each residual is dist from each distribution mean which is now zero + data_set = (data_set - np.mean(data_set)) / np.std(data_set) + """ Min-Max Rescaling -> Useful for comparign distributions with different scales or different shapes""" + # Rescale Data - ratio of dist of each value from min value in each dataset to range of values in each dataset -> value between (0,1) now + # Forces dataset to be same scale, and perseves shape of distribution -> "Squeezed and shifted to fit between 0 and 1" + data_set= (data_set - np.amin(data_set)) / np.amax(data_set - np.amin(data_set)) + # Add 4th dimension + data_set = data_set [:,:,:,np.newaxis] + + return data_set + +# Loads labels images from given path and map pixel values to class indices and convert image data type to unit8 +def load_labels (path): + image_list =[] + + # Iterate through all paths and convert to 'png' + for filename in glob.glob(path+'/*.png'): + # Read an image from the given filename into an array + im=image.imread (filename) + # Create 'im.shape[0] x im.shape[1]' shaped array of arrays of zeros + one_hot = np.zeros((im.shape[0], im.shape[1])) + # Iterate through sorted and unique arrays of given array turned image + for i, unique_value in enumerate(np.unique(im)): + # One hot each unique array with its numerical value of its entry in the dataset -> transform categorical into numerical dummy features + one_hot[:, :][im == unique_value] = i + # Append array to list + image_list.append(one_hot) + + print('train_y shape:',np.array(image_list).shape) + + # Create an numpy array to hold all the array turned images + labels = np.array(image_list, dtype=np.uint8) + + #pyplot.imshow(labels[2]) + #pyplot.show() + + return labels + +# One hot encode label data and convert to numpy array +def process_labels (seg_data): + onehot_Y = [] + + # Iterate through all array turned images by shapes first value + for n in range(seg_data.shape[0]): + + # Get data at position in array + im = seg_data[n] + + # There are 4 classes + n_classes = 4 + + # Create 'im.shape[0] x im.shape[1] x n_classes' shaped array of arrays of arrays of zeros with type uint8 + one_hot = np.zeros((im.shape[0], im.shape[1], n_classes),dtype=np.uint8) + + # Iterate through sorted and unique arrays of given array turned image + for i, unique_value in enumerate(np.unique(im)): + # One hot each unique array with its numerical value of its entry in the dataset -> transform categorical into numerical dummy features + one_hot[:, :, i][im == unique_value] = 1 + # Append array to list + onehot_Y.append(one_hot) + + # Create an numpy array to hold all the array turned images + onehot_Y =np.array(onehot_Y) + #print (onehot_Y.dtype) + #print (np.unique(onehot_validate_Y)) + #print (onehot_Y.shape) + + return onehot_Y \ No newline at end of file diff --git a/recognition/Miller/modules.py b/recognition/Miller/modules.py new file mode 100644 index 0000000000..0ece4cbbfc --- /dev/null +++ b/recognition/Miller/modules.py @@ -0,0 +1,197 @@ +""" +“modules.py" containing the source code of the components of your model. Each component must be +implementated as a class or a function + +Based on Neural Discrete Representation Learning by van der Oord et al https://arxiv.org/pdf/1711.00937.pdf +and the given example on https://keras.io/examples/generative/vq_vae/ +""" +import tensorflow as tf + +"""CREATE STRUCTURE OF VQ-VAR MODEL""" + +""" +Class Representation of the Vector Quantization laye + +Structure is: + 1. Reshape into (n,h,w,d) + 2. Calculate L2-normalized distance between the inputs and the embeddings. -> (n*h*w, d) + 3. Argmin -> find minimum distance between indices for each n*w*h vector + 4. Index from dictionary: index the closest vector from the dictionary for each of n*h*w vectors + 5. Reshape into original shape (n, h, w, d) + 6. Copy gradients from q -> x +""" +class VectorQ_layer(tf.keras.layers.Layer): + def __init__(self, embedding_num, latent_dimension, beta=0.25, **kwargs): + super().__init__(**kwargs) + self.embedding_num = embedding_num + self.latent_dimension = latent_dimension + self.beta = beta + + # Initialize the embeddings which we will quantize. + w_init = tf.random_uniform_initializer() + self.embeddings = tf.Variable(initial_value=w_init(shape=(self.latent_dimension, self.embedding_num), dtype="float32"),trainable=True,name="embeddings_vqvae",) + + # Forward Pass behaviour. Takes Tensor as input + def call(self, x): + # Calculate the input shape and store for later -> Shape of (n,h,w,d) + input_shape = tf.shape(x) + + # Flatten the inputs to keep the embedding dimension intact. + # Combine all dimensions into last one 'd' -> (n*h*w, d) + flatten = tf.reshape(x, [-1, self.latent_dimension]) + + # Get code indices + # Calculate L2-normalized distance between the inputs and the embeddings. + # For each n*h*w vectors, we calculate the distance from each of k vectors of embedding dictionaty to obtain matrix of shape (n*h*w, k) + similarity = tf.matmul(flatten, self.embeddings) + distances = (tf.reduce_sum(flatten ** 2, axis=1, keepdims=True) + tf.reduce_sum(self.embeddings ** 2, axis=0) - 2 * similarity) + + # For each n*h*w vectors, find the indices of closest k vector from dictionary; find minimum distance. + encoded_indices = tf.argmin(distances, axis=1) + + # Turn the indices into a one hot encoded vectors; index the closest vector from the dictionary for each n*h*w vector + encodings = tf.one_hot(encoded_indices, self.embedding_num) + quantized = tf.matmul(encodings, self.embeddings, transpose_b=True) + + # Reshape the quantized values back to its original input shape -> (n,h,w,d) + quantized = tf.reshape(quantized, input_shape) + + """ LOSS CALCULATIONS """ + """ + COMMITMENT LOSS + Since volume of embedding spcae is dimensionless, it may grow arbitarily if embedding ei does not + train as fast as encoder parameters. Thus add a commitment loss to make sure encoder commits to an embedding + CODE BOOK LOSS + Gradients bypass embedding, so we use a dictionary learningn algorithm which uses l2 error to + move embedding vectors ei towards encoder output + + tf.stop_gradient -> no gradient flows through + """ + commitment_loss = tf.reduce_mean((tf.stop_gradient(quantized) - x) ** 2) + codebook_loss = tf.reduce_mean((quantized - tf.stop_gradient(x)) ** 2) + self.add_loss(self.beta * commitment_loss + codebook_loss) + # Straight-through estimator. + # Unable to back propragate as gradient wont flow through argmin. Hence copy gradient from qunatised to x + # During backpropagation, (quantized -x) wont be included in computation anf the gradient obtained will be copied for inputs + quantized = x + tf.stop_gradient(quantized - x) + + return quantized + +# Represents the VAE Structure +class VAE: + def __init__(self, embedding_num, latent_dimension, beta=0.25): + self.embedding_num = embedding_num + self.latent_dimension = latent_dimension + self.beta=beta + """ + Returns layered model for encoder architecture built from convolutional layers. + + activations: ReLU advised as other activations are not optimal for encoder/decoder quantization architecture. + e.g. Leaky ReLU activated models are difficult to train -> cause sporadic loss spikes that model struggles to recover from + """ + # Encoder Component + def encoder_component(self): + #2D Convolutional Layers + # filters -> dimesion of output space + # kernal_size -> convolution window size + # activation -> activation func used + # relu -> + # strides -> spaces convolution window moves vertically and horizontally + # padding -> "same" pads with zeros to maintain output size same as input size + inputs = tf.keras.Input(shape=(256, 256, 1)) + + layer = tf.keras.layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(inputs) + layer = tf.keras.layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(layer) + + outputs = tf.keras.layers.Conv2D(self.latent_dimension, 1, padding="same")(layer) + return tf.keras.Model(inputs, outputs, name="encoder") + + # Returns the vq Layer + def vq_layer(self): + return VectorQ_layer(self.embedding_num, self.latent_dimension, self.beta, name="vector_quantizer") + + """ + Returns the model for decoder architecture built from tranposed convolutional layers. + + activations: ReLU advised as other activations are not optimal for encoder/decoder quantization architecture. + e.g. Leaky ReLU activated models are difficult to train -> cause sporadic loss spikes that model struggles to recover from + """ + # Decoder Component + def decoder_component(self): + inputs = tf.keras.Input(shape=self.encoder_component().output.shape[1:]) + #2D Convolutional Transpose Layers + # filters -> dimesion of output space + # kernal_size -> convolution window size + # activation -> activation func used + # relu -> + # strides -> spaces convolution window moves vertically and horizontally + # padding -> "same" pads with zeros to maintain output size same as input size + layer = tf.keras.layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(inputs) + layer = tf.keras.layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(layer) + outputs = tf.keras.layers.Conv2DTranspose(1, 3, padding="same")(layer) + return tf.keras.Model(inputs, outputs, name="decoder") + + # Build Model + def build_model(self): + vq_layer = self.vq_layer() + encoder = self.encoder_component() + decoder = self.decoder_component() + + inputs = tf.keras.Input(shape=(256, 256, 1)) + encoder_outputs = encoder(inputs) + quantized_latents = vq_layer(encoder_outputs) + reconstructions = decoder(quantized_latents) + model = tf.keras.Model(inputs, reconstructions, name="vq_vae") + model.summary() + return model + +# Create a model instance and sets training paramters +class VQVAETRAINER(tf.keras.models.Model): + def __init__(self, variance, latent_dimension=32, embeddings_num=128, **kwargs): + + super(VQVAETRAINER, self).__init__(**kwargs) + self.latent_dimension = latent_dimension + self.embeddings_num = embeddings_num + self.variance = variance + + VAE_model = VAE(self.embeddings_num, self.latent_dimension) + self.vqvae_model = VAE_model.build_model() + + + self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss") + self.reconstruction_loss_tracker = tf.keras.metrics.Mean(name="reconstruction_loss") + self.vq_loss_tracker = tf.keras.metrics.Mean(name="vq_loss") + + @property + def metrics(self): + # Model metrics -> returns losses (total loss, reconstruction loss and the vq_loss) + return [self.total_loss_tracker, self.reconstruction_loss_tracker, self.vq_loss_tracker] + + def train_step(self, x): + with tf.GradientTape() as tape: + # Outputs from the VQ-VAE. + reconstructions = self.vqvae_model(x) + + # Calculate the losses. + reconstruction_loss = (tf.reduce_mean((x - reconstructions) ** 2) / self.variance) + total_loss = reconstruction_loss + sum(self.vqvae_model.losses) + + # Backpropagation. + grads = tape.gradient(total_loss, self.vqvae_model.trainable_variables) + self.optimizer.apply_gradients(zip(grads, self.vqvae_model.trainable_variables)) + + # Loss tracking. + """CODEBOOK LOSS + COMMITMENT LOSS -> euclidean loss + encoder loss""" + self.total_loss_tracker.update_state(total_loss) + """RECONSTRUCTION ERROR (MSE) -> between input and reconstruction""" + self.reconstruction_loss_tracker.update_state(reconstruction_loss) + self.vq_loss_tracker.update_state(sum(self.vqvae_model.losses)) + + # Log results. + return { + "loss": self.total_loss_tracker.result(), + "reconstruction_loss": self.reconstruction_loss_tracker.result(), + "vqvae_loss": self.vq_loss_tracker.result(), + } + + diff --git a/recognition/Miller/predict.py b/recognition/Miller/predict.py new file mode 100644 index 0000000000..43d540db6e --- /dev/null +++ b/recognition/Miller/predict.py @@ -0,0 +1,113 @@ +""" +“predict.py" showing example usage of your trained model. Print out any results and / or provide visualisations where applicable +""" +import numpy as np +import matplotlib.pyplot as plt +import modules as mod + +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) +# Show how well program performs + + +""" MODEL AND TRAIN VQ-VAE """ + +""" RECONSTRUCTION RESULTS""" +# Plots the original image against the reconstructed one +def plot_comparision_original_to_reconstructed(original, reconstructed): + plt.figure(figsize = (10,12)) + plt.subplot(1, 2, 1) + plt.imshow(original.squeeze() + 0.5, cmap = 'gray') + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(reconstructed.squeeze() + 0.5, cmap = 'gray') + plt.title("Reconstructed") + plt.axis("off") + + plt.show() + +trained_model = mod.model.model +idx = np.random.choice(len(test_X), 10) +test_images = test_X[idx] +reconstructions_test = trained_model.predict(test_images) + +for test_image, reconstructed_image in zip(test_images, reconstructions_test): + plot_comparision_original_to_reconstructed(test_image, reconstructed_image) + +# Return the average pixel value for the image and the reconstruction +def calculate_mean(image, reconstructed_image): + image_pixel = 0 + reconstructed_pixel = 0 + + for row in range(256): + for col in range(256): + image_pixel += image[row][col] + reconstructed_pixel += reconstructed_image[row][col] + + image_pixel = image_pixel / (256**2) + reconstructed_pixel = reconstructed_pixel / (256**2) + + return image_pixel, reconstructed_image + +# Returns std dev for the pixel value of each image +def calculate_stddev(image, reconstructed_image, image_mean, reconstructed_image_mean): + + image_variance = 0 + reconstructed_image_variance = 0 + + for row in range(256): + for col in range(256): + image_variance += np.square(image[row][col] - image_mean) + reconstructed_image_variance += np.square(reconstructed_image[row][col] - reconstructed_image_mean) + + image_variance = np.sqrt(image_variance/256**2 - 1) + reconstructed_image_variance = np.sqrt(reconstructed_image_variance/256**2 - 1) + return image_variance, reconstructed_image_variance + +# Returns the covariance for both images +def calculate_covariance(image, reconstructed_image, image_mean, predicted_mean): + covariance_value = 0 + + for row in range(256): + for col in range(256): + covariance_value += (image[row][col] - image_mean)*(reconstructed_image[row][col] - predicted_mean) + + return covariance_value/256**256-1 + + +# Return the structural similarity between two images; measures the window x and y of common size. +# https://en.wikipedia.org/wiki/Structural_similarity +def structural_similarity(mean_X, predicted_mean, stddev_X, predicted_stddev, covariance): + K1 = 0.01 # default value + K2 = 0.03 # default value + L = 255 # dynamic range of pixel value (2^bits per pixel -1) + C1 = (K1 * L)**2 + C2 = (K2 * L)**2 + C3 = C2 / 2 + + luminance_x_y = (2*mean_X*predicted_mean + C1)/(mean_X**2+predicted_mean**2+C1) + contrast_x_y = (2*stddev_X*predicted_stddev + C2)/(stddev_X**2+np. predicted_stddev**2+C2) + structure_x_y = (covariance+C3)/(stddev_X*predicted_stddev+C3) + return luminance_x_y * contrast_x_y * structure_x_y + +# Returns the structured similarity for the entire data set +def structural_similarity_mean(test_X, model): + structured_similarity_coef = 0 + + for i, data in enumerate(test_X): + # get reconstructed image + image_reconstruction = model.predict(data) + data = data[0,:,:,0] + image_reconstruction = image_reconstruction[0,:,:,0] + + # Calculate structured similarity and add to total + mean_X, predicted_mean = calculate_mean(data, image_reconstruction) + stddev_X, predicted_stddev = calculate_stddev(data, image_reconstruction, mean_X, predicted_mean) + covariance = calculate_covariance(data, image_reconstruction, mean_X, predicted_mean) + structured_similarity_coef += structural_similarity(mean_X, predicted_mean, stddev_X, predicted_stddev, covariance) + + return structured_similarity_coef / len(test_X) + +print(structural_similarity_mean(test_X, trained_model)) \ No newline at end of file diff --git a/recognition/Miller/train.py b/recognition/Miller/train.py new file mode 100644 index 0000000000..de4984a1e3 --- /dev/null +++ b/recognition/Miller/train.py @@ -0,0 +1,224 @@ +""" +“train.py" containing the source code for training, validating, testing and saving your model. The model +should be imported from “modules.py” and the data loader should be imported from “dataset.py”. Make +sure to plot the losses and metrics during training +""" +# %% +import dataset as data +import modules as mod +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf + +# Download Data and then unzip +#download_oasis() +# %% + +""" PROCESS TRAINING DATA""" +# Load the training data from the Oasis Data set +train_X = data.load_training ("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_train") + +# Check training image +#pyplot.imshow(train_X[2]) +#pyplot.show() + +# Pre process training data set +train_X = data.process_training(train_X) +train_x_var = np.var(train_X) +# Load the validaton data from the oasis Data set +#validate_X = data.load_training ("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_validate") + +# Pre process validation data set +#validate_X = data.process_training(validate_X) + +# Load the test data from the oasis Data Set +test_X = data.load_training ("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_test") + +# Pre process test data set +test_X = data.process_training(test_X) + +""" PROCESS TRAINING LABELS DATA """ +# Load the segmented training labels data from the Oasis Data set +train_Y = data.load_labels ("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_seg_train") +# Pre process training labels data +train_Y = data.process_labels(train_Y) + +# Load the segmented validation labels data from the Oasis Data set +#validate_Y = data.load_labels("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_seg_validate") +# Pre process validation labels data +#validate_Y = data.process_labels(validate_Y) + +# Load the segmented test labels data from the Oasis Data set +test_Y = data.load_labels("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_seg_test") +# Pre process test labels data +test_Y = data.process_labels(test_Y) +#%% +""" MODEL AND TRAIN VQ-VAE """ +# Create a instance of the VQ-VAE model +latent_dimensions = 16 #dimensionality if each latent embedding vector +embeddings_number = 128 #number of embeddings in the codebook + +model = mod.VQVAETRAINER(train_x_var, latent_dimensions, embeddings_number) + +""" +Optimiser -> learning rate +'adam' adjusts learning rate whilst training; learning rate deterines how fast optimal weights are calculated. Smaller +Learning rate = more wights but takes longer to compute +""" +# Create Model +model.compile (optimizer='adam') + +# Train model +history = model.fit(train_X, epochs=15, batch_size=128) +print("disaster!!!!") + +#%% +# Plot Loss +plt.plot(history.history['reconstruction_loss'], label='Reconstruction Loss') +plt.title('VQVAE Loss') +plt.xlabel('Epoch') +plt.ylabel('Loss') +plt.legend(['Train', 'Validation'], loc='upper left') +plt.show() + +#%% + +""" MODEL AND TRAIN VQ-VAE """ + +""" RECONSTRUCTION RESULTS""" +# Return the average pixel value for the image and the reconstruction +def calculate_mean(image, reconstructed_image): + image_pixel = 0 + reconstructed_pixel = 0 + + for row in range(256): + for col in range(256): + image_pixel += image[row][col] + reconstructed_pixel += reconstructed_image[row][col] + + image_pixel = image_pixel / (256**2) + reconstructed_pixel = reconstructed_pixel / (256**2) + + return image_pixel, reconstructed_image + +# Returns std dev for the pixel value of each image +def calculate_stddev(image, reconstructed_image, image_mean, reconstructed_image_mean): + + image_variance = 0 + reconstructed_image_variance = 0 + + for row in range(256): + for col in range(256): + image_variance += np.square(image[row][col] - image_mean) + reconstructed_image_variance += np.square(reconstructed_image[row][col] - reconstructed_image_mean) + + image_variance = np.sqrt(image_variance/(256**2 - 1)) + reconstructed_image_variance = np.sqrt(reconstructed_image_variance/(256**2 - 1)) + return image_variance, reconstructed_image_variance + +# Returns the covariance for both images +def calculate_covariance(image, reconstructed_image, image_mean, predicted_mean): + covariance_value = 0 + + for row in range(256): + for col in range(256): + covariance_value += (image[row][col] - image_mean)*(reconstructed_image[row][col] - predicted_mean) + + return covariance_value/(256**256-1) + + +# Return the structural similarity between two images; measures the window x and y of common size. +# https://en.wikipedia.org/wiki/Structural_similarity +def structural_similarity(mean_X, predicted_mean, stddev_X, predicted_stddev, covariance): + K1 = 0.01 # default value + K2 = 0.03 # default value + L = 255 # dynamic range of pixel value (2^bits per pixel -1) + C1 = (K1 * L)**2 + C2 = (K2 * L)**2 + C3 = C2 / 2 + + luminance_x_y = (2*mean_X*predicted_mean + C1)/(mean_X**2+predicted_mean**2+C1) + contrast_x_y = (2*stddev_X*predicted_stddev + C2)/(stddev_X**2+np. predicted_stddev**2+C2) + structure_x_y = (covariance+C3)/(stddev_X*predicted_stddev+C3) + return luminance_x_y * contrast_x_y * structure_x_y + +# Plots the original image against the reconstructed one with their Structured similarity rating +def plot_comparision_original_to_reconstructed(original, reconstructed, ssim): + plt.suptitle("Structured Similiarity Rating: %.2f" %ssim) + + #plt.figure(figsize = (10,12)) + plt.subplot(1, 2, 1) + plt.imshow(original.squeeze() + 0.5, cmap = 'gray') + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(reconstructed.squeeze() + 0.5, cmap = 'gray') + plt.title("Reconstructed") + plt.axis("off") + + plt.show() + +trained_model = model.vqvae_model + +# Select 5 random Test images +idx = np.random.choice(len(test_X), 5) +test_images = test_X[idx] +reconstructions_test = trained_model.predict(test_images) + +# Perform Predictions on the test images +for test_image, reconstructed_image in zip(test_images, reconstructions_test): + """mean, mean_r = calculate_mean(test_image, reconstructed_image) + stddev, stddev_r = calculate_stddev(test_image,reconstructed_image, mean, mean_r) + cov = calculate_covariance(test_image, reconstructed_image, mean, mean_r) + structured_similiarity_rating = structural_similarity(mean, mean_r, stddev, stddev_r, cov) + """ + structured_similiarity_rating = tf.image.ssim(test_image, reconstructed_image, max_val=1.0) + plot_comparision_original_to_reconstructed(test_image, reconstructed_image, structured_similiarity_rating) + + + +#%% + +# Returns the structured similarity for the entire data set +def structural_similarity_mean(test_X, model): + structured_similarity_coef = 0 + + for i, data in enumerate(test_X): + # get reconstructed image + image_reconstruction = model.predict(data) + data = data[0,:,:,0] + image_reconstruction = image_reconstruction[0,:,:,0] + + # Calculate structured similarity and add to total + mean_X, predicted_mean = calculate_mean(data, image_reconstruction) + stddev_X, predicted_stddev = calculate_stddev(data, image_reconstruction, mean_X, predicted_mean) + covariance = calculate_covariance(data, image_reconstruction, mean_X, predicted_mean) + structured_similarity_coef += structural_similarity(mean_X, predicted_mean, stddev_X, predicted_stddev, covariance) + + return structured_similarity_coef / len(test_X) + +# Calculate the mean structural Similarity for the reconstructed images +mean_structured_similiarity = structural_similarity_mean(test_X, trained_model) +print(mean_structured_similiarity) + +# %% +encoder = model.vqvae_model.get_layer("encoder") +quantizer = model.vqvae_model.get_layer("vector_quantizer") + +encoded_outputs = encoder.predict(test_images) +flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) +codebook_indices = quantizer.get_code_indices(flat_enc_outputs) +codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) + +for i in range(len(test_images)): + plt.subplot(1, 2, 1) + plt.imshow(test_images[i].squeeze() + 0.5) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(codebook_indices[i]) + plt.title("Code") + plt.axis("off") + plt.show() diff --git a/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 1.png b/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 1.png new file mode 100644 index 0000000000..2e7d10022f Binary files /dev/null and b/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 1.png differ diff --git a/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 2.png b/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 2.png new file mode 100644 index 0000000000..758b52ca82 Binary files /dev/null and b/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 2.png differ diff --git a/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 3.png b/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 3.png new file mode 100644 index 0000000000..6fc49f8131 Binary files /dev/null and b/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 3.png differ diff --git a/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 4.png b/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 4.png new file mode 100644 index 0000000000..83a0eb8628 Binary files /dev/null and b/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 4.png differ diff --git a/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 5.png b/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 5.png new file mode 100644 index 0000000000..129515b21c Binary files /dev/null and b/recognition/s4581053 VQVAE OASIS/Images/Reconstructed Image 5.png differ diff --git a/recognition/s4581053 VQVAE OASIS/Images/VQVAE LOSS GRAPH.png b/recognition/s4581053 VQVAE OASIS/Images/VQVAE LOSS GRAPH.png new file mode 100644 index 0000000000..b81d27e4a5 Binary files /dev/null and b/recognition/s4581053 VQVAE OASIS/Images/VQVAE LOSS GRAPH.png differ diff --git a/recognition/s4581053 VQVAE OASIS/README.MD b/recognition/s4581053 VQVAE OASIS/README.MD new file mode 100644 index 0000000000..bfd809679e --- /dev/null +++ b/recognition/s4581053 VQVAE OASIS/README.MD @@ -0,0 +1,74 @@ +# Vector Quantized Variational Auto-encoder(VQ VAE Model) + +In this report, a generative model of the Vector Quantized Variational AutoEncoder (VQ VAE) was used to generate reconstructed images of the OASIS brain data set that are "reasonably clear" and have a Structured Similarity (SSIM) of over 0.6. The VQ VAE was adapted using tensorflow keras. + +#### Description of VQ VAE Algorithm +![](https://miro.medium.com/max/1400/1*yRdNe3xi4f3KV6ULW7yArA.png) +>Figure 1: Graphical representation of a VQ-VAE network. + +A standard VAE (encoder->decoder) uses a continous latent space that is sampled using gaussain distribution; this makes it hard to learn a continuous distribution with a gradient descent. In comparison, VQ VAE uses a discrete latent space; and consists of three parts as seen above: + +1. Encoder: + * Convolutional network to downsample the features of an image +2. Latent Space: + * Codebook consists of n latent embedding vectors of dimension D each + * Each code represents the distance between each embedding and encoded output (euclidean distance) ->outputs embeded vector + * feed closest encoder output to codebook as input to decoder +3. Decoder: + * Convolutional network to upsample and gnerate reconstructed samples. + +#### ==============Oasis Brain Data Set============== +![](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRl7czOsj3uzWRQ6NT2ofed7QBsKiqrUq6Bsw&usqp=CAU) +>Figure 2: Comparison of an image stored in the train vs test data sets + +The Oasis MRI Dataset cobtains 9664 training images, 544 test images and 1120 validation images. An example of train and test data is shown above. The images are preloaded into a file location and from there extracted into processing for use. + +##### Data Pre-Processing + +Before the data was used, it was normalised through residual extration and rescaling. This makes it easier to compare the distributions with different means and scales to maintain the shape of the distribution. + +## ==============Training============== + +The three data groups - train, test, and validate are split 0.85/0.1/0.05. The training set contains the most images so the model has enough information to learn from to produce accurate reconstructions later. The test set is used to validate these reconstructions. The validation set is not required, as the model is judged by the quality of the reconstructons on the test set. The model is trained with 5 epochs on a batch size of 128. + +![](/recognition/s4581053%20VQVAE%20OASIS/Images/VQVAE%20LOSS%20GRAPH.png) + +>Figure 3: VQVAE Reconstruction loss over 5 epochs + +## ==============Results============== + +Below are 5 randomly chosen reconstructed images and its comparison to a test image + +NOTE: The below images and Avergae SSIM rating was originally run on 15 epochs. NOT 5. + +![](/recognition/s4581053%20VQVAE%20OASIS/Images/Reconstructed%20Image%201.png) + +![](/recognition/s4581053%20VQVAE%20OASIS/Images/Reconstructed%20Image%202.png) + +![](/recognition/s4581053%20VQVAE%20OASIS/Images/Reconstructed%20Image%203.png) + +![](/recognition/s4581053%20VQVAE%20OASIS/Images/Reconstructed%20Image%204.png) + +![](/recognition/s4581053%20VQVAE%20OASIS/Images/Reconstructed%20Image%205.png) + +>Figure 4: 5 randomly chosen reconstructed images and their ssim value compared to it original image + +The reconstructed images achieved a mean Structured Similarity of 0.734 + +## Dependencies +* Python 3.7 +* TensorFlow 2.6.0 +* Numpy 1.19.5 +* matplotlib 3.2.2 +* Pillow 7.1.2 +* os +* Pre-processed OASIS MRI dataset (accessible at https://cloudstor.aarnet.edu.au/plus/s/n5aZ4XX1WBKp6HZ/download). + +## References +[1] A. v. d. Oord, O. Vinyals, and K. Kavukcuoglu, 2018. Neural Discrete Representation Learning. [Online]. Available at: https://arxiv.org/pdf/1711.00937.pdf. + +[2] Paul, S., 2021. Keras documentation: Vector-Quantized Variational Autoencoders. [online] Keras.io. Available at: https://keras.io/examples/generative/vq_vae/. + +[3] https://github.com/shakes76/PatternFlow/tree/master/recognition/MySolution + +[4] https://keras.io/examples/generative/pixelcnn/ diff --git a/recognition/s4581053 VQVAE OASIS/dataset.py b/recognition/s4581053 VQVAE OASIS/dataset.py new file mode 100644 index 0000000000..35eeab7b0f --- /dev/null +++ b/recognition/s4581053 VQVAE OASIS/dataset.py @@ -0,0 +1,122 @@ +""" +dataset.py" containing the data loader for loading and preprocessing your data + +This was file utilises and modifies the fucntions found in https://github.com/shakes76/PatternFlow/tree/master/recognition/MySolution +""" + +import tensorflow as tf +import glob +import numpy as np +from matplotlib import image +import os +from PIL import Image + + +# Download the Oasis Data as zip file. Will need to extract it manually afterwards +def download_oasis (): + + dataset_url = "https://cloudstor.aarnet.edu.au/plus/s/n5aZ4XX1WBKp6HZ/download" + + # Download file from URL Path, origin=path, fname=file name, untar=compress file + tf.keras.utils.get_file(origin=dataset_url,fname='oa-sis' ,untar=True) + +# Loads the training images (non segmented) from given path and returns an numpy array of arrays +def load_training (path): + + image_list = [] + # Iterate through all paths and convert to 'png' + for filename in glob.glob(path + '/*.png'): + # Read an image from the given filename into an array + im = image.imread (filename) + # Append array to list + image_list.append(im) + + print('train_X shape:', np.array(image_list).shape) + + # Create an numpy array to hold all the array turned images + train_set = np.array(image_list, dtype=np.float32) + + + return train_set + +# Normalizes training images and adds 4th dimention +def process_training (data_set): + + """ Residual Extraction -> Useful for comparing distributions with different means but similar shapes""" + # Calculate the residuals of the data - each residual is dist from each distribution mean which is now zero + data_set = (data_set - np.mean(data_set)) / np.std(data_set) + """ Min-Max Rescaling -> Useful for comparign distributions with different scales or different shapes""" + # Rescale Data - ratio of dist of each value from min value in each dataset to range of values in each dataset -> value between (0,1) now + # Forces dataset to be same scale, and perseves shape of distribution -> "Squeezed and shifted to fit between 0 and 1" + data_set= (data_set - np.amin(data_set)) / np.amax(data_set - np.amin(data_set)) + # Add 4th dimension + data_set = data_set [:,:,:,np.newaxis] + + return data_set + +# Loads labels images from given path and map pixel values to class indices and convert image data type to unit8 +def load_labels (path): + image_list =[] + + # Iterate through all paths and convert to 'png' + for filename in glob.glob(path+'/*.png'): + # Read an image from the given filename into an array + im=image.imread (filename) + # Create 'im.shape[0] x im.shape[1]' shaped array of arrays of zeros + one_hot = np.zeros((im.shape[0], im.shape[1])) + # Iterate through sorted and unique arrays of given array turned image + for i, unique_value in enumerate(np.unique(im)): + # One hot each unique array with its numerical value of its entry in the dataset -> transform categorical into numerical dummy features + one_hot[:, :][im == unique_value] = i + # Append array to list + image_list.append(one_hot) + + print('train_y shape:',np.array(image_list).shape) + + # Create an numpy array to hold all the array turned images + labels = np.array(image_list, dtype=np.uint8) + + #pyplot.imshow(labels[2]) + #pyplot.show() + + return labels + +# One hot encode label data and convert to numpy array +def process_labels (seg_data): + onehot_Y = [] + + # Iterate through all array turned images by shapes first value + for n in range(seg_data.shape[0]): + + # Get data at position in array + im = seg_data[n] + + # There are 4 classes + n_classes = 4 + + # Create 'im.shape[0] x im.shape[1] x n_classes' shaped array of arrays of arrays of zeros with type uint8 + one_hot = np.zeros((im.shape[0], im.shape[1], n_classes),dtype=np.uint8) + + # Iterate through sorted and unique arrays of given array turned image + for i, unique_value in enumerate(np.unique(im)): + # One hot each unique array with its numerical value of its entry in the dataset -> transform categorical into numerical dummy features + one_hot[:, :, i][im == unique_value] = 1 + # Append array to list + onehot_Y.append(one_hot) + + # Create an numpy array to hold all the array turned images + onehot_Y =np.array(onehot_Y) + #print (onehot_Y.dtype) + #print (np.unique(onehot_validate_Y)) + #print (onehot_Y.shape) + + return onehot_Y + +def codebook_indice_generator(data, encoder, quantizer): + encoded_outputs = encoder.predict(data) + flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) + codebook_indices = quantizer.get_code_indices(flat_enc_outputs) + + codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) + print(f"Shape of the training data for PixelCNN: {codebook_indices.shape}") + return codebook_indices \ No newline at end of file diff --git a/recognition/s4581053 VQVAE OASIS/modules.py b/recognition/s4581053 VQVAE OASIS/modules.py new file mode 100644 index 0000000000..302a566212 --- /dev/null +++ b/recognition/s4581053 VQVAE OASIS/modules.py @@ -0,0 +1,286 @@ +""" +“modules.py" containing the source code of the components of your model. Each component must be +implementated as a class or a function + +Based on Neural Discrete Representation Learning by van der Oord et al https://arxiv.org/pdf/1711.00937.pdf +and the given example on https://keras.io/examples/generative/vq_vae/ and +https://keras.io/examples/generative/pixelcnn/ +""" +import tensorflow as tf + +"""CREATE STRUCTURE OF VQ-VAR MODEL""" + +""" +Class Representation of the Vector Quantization laye + +Structure is: + 1. Reshape into (n,h,w,d) + 2. Calculate L2-normalized distance between the inputs and the embeddings. -> (n*h*w, d) + 3. Argmin -> find minimum distance between indices for each n*w*h vector + 4. Index from dictionary: index the closest vector from the dictionary for each of n*h*w vectors + 5. Reshape into original shape (n, h, w, d) + 6. Copy gradients from q -> x +""" +class VectorQ_layer(tf.keras.layers.Layer): + def __init__(self, embedding_num, latent_dimension, beta=0.25, **kwargs): + super().__init__(**kwargs) + self.embedding_num = embedding_num + self.latent_dimension = latent_dimension + self.beta = beta + + # Initialize the embeddings which we will quantize. + w_init = tf.random_uniform_initializer() + self.embeddings = tf.Variable(initial_value=w_init(shape=(self.latent_dimension, self.embedding_num), dtype="float32"),trainable=True,name="embeddings_vqvae",) + + # Forward Pass behaviour. Takes Tensor as input + def call(self, x): + # Calculate the input shape and store for later -> Shape of (n,h,w,d) + input_shape = tf.shape(x) + + # Flatten the inputs to keep the embedding dimension intact. + # Combine all dimensions into last one 'd' -> (n*h*w, d) + flatten = tf.reshape(x, [-1, self.latent_dimension]) + + # Get code indices + # Calculate L2-normalized distance between the inputs and the embeddings. + # For each n*h*w vectors, we calculate the distance from each of k vectors of embedding dictionaty to obtain matrix of shape (n*h*w, k) + similarity = tf.matmul(flatten, self.embeddings) + distances = (tf.reduce_sum(flatten ** 2, axis=1, keepdims=True) + tf.reduce_sum(self.embeddings ** 2, axis=0) - 2 * similarity) + + # For each n*h*w vectors, find the indices of closest k vector from dictionary; find minimum distance. + encoded_indices = tf.argmin(distances, axis=1) + + # Turn the indices into a one hot encoded vectors; index the closest vector from the dictionary for each n*h*w vector + encodings = tf.one_hot(encoded_indices, self.embedding_num) + quantized = tf.matmul(encodings, self.embeddings, transpose_b=True) + + # Reshape the quantized values back to its original input shape -> (n,h,w,d) + quantized = tf.reshape(quantized, input_shape) + + """ LOSS CALCULATIONS """ + """ + COMMITMENT LOSS + Since volume of embedding spcae is dimensionless, it may grow arbitarily if embedding ei does not + train as fast as encoder parameters. Thus add a commitment loss to make sure encoder commits to an embedding + CODE BOOK LOSS + Gradients bypass embedding, so we use a dictionary learningn algorithm which uses l2 error to + move embedding vectors ei towards encoder output + + tf.stop_gradient -> no gradient flows through + """ + commitment_loss = tf.reduce_mean((tf.stop_gradient(quantized) - x) ** 2) + codebook_loss = tf.reduce_mean((quantized - tf.stop_gradient(x)) ** 2) + self.add_loss(self.beta * commitment_loss + codebook_loss) + # Straight-through estimator. + # Unable to back propragate as gradient wont flow through argmin. Hence copy gradient from qunatised to x + # During backpropagation, (quantized -x) wont be included in computation anf the gradient obtained will be copied for inputs + quantized = x + tf.stop_gradient(quantized - x) + + return quantized + + def get_code_indices(self, inputs): + # Get code indices + # Calculate L2-normalized distance between the inputs and the embeddings. + # For each n*h*w vectors, we calculate the distance from each of k vectors of embedding dictionaty to obtain matrix of shape (n*h*w, k) + similarity = tf.matmul(inputs, self.embeddings) + distances = (tf.reduce_sum(inputs ** 2, axis=1, keepdims=True) + tf.reduce_sum(self.embeddings ** 2, axis=0) - 2 * similarity) + + # For each n*h*w vectors, find the indices of closest k vector from dictionary; find minimum distance. + encoded_indices = tf.argmin(distances, axis=1) + return encoded_indices + +# Represents the VAE Structure +class VAE: + def __init__(self, embedding_num, latent_dimension, beta=0.25): + self.embedding_num = embedding_num + self.latent_dimension = latent_dimension + self.beta=beta + """ + Returns layered model for encoder architecture built from convolutional layers. + + activations: ReLU advised as other activations are not optimal for encoder/decoder quantization architecture. + e.g. Leaky ReLU activated models are difficult to train -> cause sporadic loss spikes that model struggles to recover from + """ + # Encoder Component + def encoder_component(self): + #2D Convolutional Layers + # filters -> dimesion of output space + # kernal_size -> convolution window size + # activation -> activation func used + # relu -> + # strides -> spaces convolution window moves vertically and horizontally + # padding -> "same" pads with zeros to maintain output size same as input size + inputs = tf.keras.Input(shape=(256, 256, 1)) + + layer = tf.keras.layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(inputs) + layer = tf.keras.layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(layer) + + outputs = tf.keras.layers.Conv2D(self.latent_dimension, 1, padding="same")(layer) + return tf.keras.Model(inputs, outputs, name="encoder") + + # Returns the vq Layer + def vq_layer(self): + return VectorQ_layer(self.embedding_num, self.latent_dimension, self.beta, name="vector_quantizer") + + """ + Returns the model for decoder architecture built from tranposed convolutional layers. + + activations: ReLU advised as other activations are not optimal for encoder/decoder quantization architecture. + e.g. Leaky ReLU activated models are difficult to train -> cause sporadic loss spikes that model struggles to recover from + """ + # Decoder Component + def decoder_component(self): + inputs = tf.keras.Input(shape=self.encoder_component().output.shape[1:]) + #2D Convolutional Transpose Layers + # filters -> dimesion of output space + # kernal_size -> convolution window size + # activation -> activation func used + # relu -> + # strides -> spaces convolution window moves vertically and horizontally + # padding -> "same" pads with zeros to maintain output size same as input size + layer = tf.keras.layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(inputs) + layer = tf.keras.layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(layer) + outputs = tf.keras.layers.Conv2DTranspose(1, 3, padding="same")(layer) + return tf.keras.Model(inputs, outputs, name="decoder") + + # Build Model + def build_model(self): + vq_layer = self.vq_layer() + encoder = self.encoder_component() + decoder = self.decoder_component() + + inputs = tf.keras.Input(shape=(256, 256, 1)) + encoder_outputs = encoder(inputs) + quantized_latents = vq_layer(encoder_outputs) + reconstructions = decoder(quantized_latents) + model = tf.keras.Model(inputs, reconstructions, name="vq_vae") + model.summary() + return model + +# Create a model instance and sets training paramters +class VQVAETRAINER(tf.keras.models.Model): + def __init__(self, variance, latent_dimension=32, embeddings_num=128, **kwargs): + + super(VQVAETRAINER, self).__init__(**kwargs) + self.latent_dimension = latent_dimension + self.embeddings_num = embeddings_num + self.variance = variance + + VAE_model = VAE(self.embeddings_num, self.latent_dimension) + self.vqvae_model = VAE_model.build_model() + + + self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss") + self.reconstruction_loss_tracker = tf.keras.metrics.Mean(name="reconstruction_loss") + self.vq_loss_tracker = tf.keras.metrics.Mean(name="vq_loss") + + @property + def metrics(self): + # Model metrics -> returns losses (total loss, reconstruction loss and the vq_loss) + return [self.total_loss_tracker, self.reconstruction_loss_tracker, self.vq_loss_tracker] + + def train_step(self, x): + with tf.GradientTape() as tape: + # Outputs from the VQ-VAE. + reconstructions = self.vqvae_model(x) + + # Calculate the losses. + reconstruction_loss = (tf.reduce_mean((x - reconstructions) ** 2) / self.variance) + total_loss = reconstruction_loss + sum(self.vqvae_model.losses) + + # Backpropagation. + grads = tape.gradient(total_loss, self.vqvae_model.trainable_variables) + self.optimizer.apply_gradients(zip(grads, self.vqvae_model.trainable_variables)) + + # Loss tracking. + """CODEBOOK LOSS + COMMITMENT LOSS -> euclidean loss + encoder loss""" + self.total_loss_tracker.update_state(total_loss) + """RECONSTRUCTION ERROR (MSE) -> between input and reconstruction""" + self.reconstruction_loss_tracker.update_state(reconstruction_loss) + self.vq_loss_tracker.update_state(sum(self.vqvae_model.losses)) + + # Log results. + return { + "loss": self.total_loss_tracker.result(), + "reconstruction_loss": self.reconstruction_loss_tracker.result(), + "vqvae_loss": self.vq_loss_tracker.result(), + } + +""" +# The first layer is the PixelCNN layer. This layer simply +# builds on the 2D convolutional layer, but includes masking. +class PixelConvLayer(tf.keras.layers.Layer): + def __init__(self, mask_type, **kwargs): + super(PixelConvLayer, self).__init__() + self.mask_type = mask_type + self.conv = tf.keras.layers.Conv2D(**kwargs) + + def build(self, input_shape): + # Build the conv2d layer to initialize kernel variables + self.conv.build(input_shape) + # Use the initialized kernel to create the mask + kernel_shape = self.conv.kernel.get_shape() + self.mask = np.zeros(shape=kernel_shape) + self.mask[: kernel_shape[0] // 2, ...] = 1.0 + self.mask[kernel_shape[0] // 2, : kernel_shape[1] // 2, ...] = 1.0 + if self.mask_type == "B": + self.mask[kernel_shape[0] // 2, kernel_shape[1] // 2, ...] = 1.0 + + def call(self, inputs): + self.conv.kernel.assign(self.conv.kernel * self.mask) + return self.conv(inputs) + + # Next, we build our residual block layer. +# This is just a normal residual block, but based on the PixelConvLayer. +class ResidualBlock(tf.keras.layers.Layer): + def __init__(self, filters, **kwargs): + super(ResidualBlock, self).__init__(**kwargs) + self.conv1 = tf.keras.layers.Conv2D( + filters=filters, kernel_size=1, activation="relu" + ) + self.pixel_conv = PixelConvLayer( + mask_type="B", + filters=filters // 2, + kernel_size=3, + activation="relu", + padding="same", + ) + self.conv2 = tf.keras.layers.Conv2D( + filters=filters, kernel_size=1, activation="relu" + ) + + def call(self, inputs): + x = self.conv1(inputs) + x = self.pixel_conv(x) + x = self.conv2(x) + return tf.keras.layers.add([inputs, x]) + + +def pixel_model(pixelcnn_input_shape, residualblock_num, pixelcnn_layers, model): + pixelcnn_inputs = tf.keras.Input(shape=pixelcnn_input_shape, dtype=tf.int32) + ohe = tf.one_hot(pixelcnn_inputs, model.embeddings_num) + x = PixelConvLayer( + mask_type="A", filters=128, kernel_size=7, activation="relu", padding="same" + )(ohe) + + for _ in range(residualblock_num): + x = ResidualBlock(filters=128)(x) + + for _ in range(pixelcnn_layers): + x = PixelConvLayer( + mask_type="B", + filters=128, + kernel_size=1, + strides=1, + activation="relu", + padding="valid", + )(x) + + out = tf.keras.layers.Conv2D( + filters=model.embeddings_num, kernel_size=1, strides=1, padding="valid" + )(x) + + pixel_cnn = tf.keras.Model(pixelcnn_inputs, out, name="pixel_cnn") + pixel_cnn.summary() + return pixel_cnn +""" \ No newline at end of file diff --git a/recognition/s4581053 VQVAE OASIS/predict.py b/recognition/s4581053 VQVAE OASIS/predict.py new file mode 100644 index 0000000000..ccf3a60769 --- /dev/null +++ b/recognition/s4581053 VQVAE OASIS/predict.py @@ -0,0 +1,67 @@ +""" +“predict.py" showing example usage of your trained model. Print out any results and / or provide visualisations where applicable +""" +import numpy as np +import matplotlib.pyplot as plt +import modules as mod +import dataset as data + +""" MODEL AND TRAIN VQ-VAE """ +# Load the training data from the Oasis Data set +train_X = data.load_training ("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_train") +train_X = data.process_training(train_X) +train_x_var = np.var(train_X) + +# Load the test data from the oasis Data Set +test_X = data.load_training ("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_test") + +# Pre process test data set +test_X = data.process_training(test_X) + +""" RECONSTRUCTION RESULTS""" +latent_dimensions = 16 #dimensionality if each latent embedding vector +embeddings_number = 128 #number of embeddings in the codebook +# load model +model = mod.VQVAETRAINER(train_x_var, latent_dimensions, embeddings_number) +# Create Model +model.compile (optimizer='adam') + +# Train model +history = model.fit(train_X, epochs=5, batch_size=128) + +# Plot Loss +plt.plot(history.history['reconstruction_loss'], label='Reconstruction Loss') +plt.title('VQVAE Loss') +plt.xlabel('Epoch') +plt.ylabel('Loss') +plt.legend(['Train', 'Validation'], loc='upper left') +plt.show() + +# Plots the original image against the reconstructed one +def plot_comparision_original_to_reconstructed(original, reconstructed): + + plt.figure(figsize = (10,12)) + plt.subplot(1, 2, 1) + plt.imshow(original.squeeze() + 0.5, cmap = 'gray') + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(reconstructed.squeeze() + 0.5, cmap = 'gray') + plt.title("Reconstructed") + plt.axis("off") + + plt.show() + +trained_model = model.vqvae_model + +# Select 5 random Test images +idx = np.random.choice(len(test_X), 5) +test_images = test_X[idx] +reconstructions_test = trained_model.predict(test_images) + +# Perform Predictions on the test images +for test_image, reconstructed_image in zip(test_images, reconstructions_test): + + plot_comparision_original_to_reconstructed(test_image, reconstructed_image) + diff --git a/recognition/s4581053 VQVAE OASIS/train.py b/recognition/s4581053 VQVAE OASIS/train.py new file mode 100644 index 0000000000..046e882cd6 --- /dev/null +++ b/recognition/s4581053 VQVAE OASIS/train.py @@ -0,0 +1,292 @@ +""" +“train.py" containing the source code for training, validating, testing and saving your model. The model +should be imported from “modules.py” and the data loader should be imported from “dataset.py”. Make +sure to plot the losses and metrics during training +""" +# %% +import dataset as data +import modules as mod +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf + +# Download Data and then unzip +#download_oasis() +# %% + +""" PROCESS TRAINING DATA""" +# Load the training data from the Oasis Data set +train_X = data.load_training ("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_train") + +# Check training image +#pyplot.imshow(train_X[2]) +#pyplot.show() + +# Pre process training data set +train_X = data.process_training(train_X) +train_x_var = np.var(train_X) +# Load the validaton data from the oasis Data set +#validate_X = data.load_training ("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_validate") + +# Pre process validation data set +#validate_X = data.process_training(validate_X) + +# Load the test data from the oasis Data Set +test_X = data.load_training ("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_test") + +# Pre process test data set +test_X = data.process_training(test_X) + +""" PROCESS TRAINING LABELS DATA """ +# Load the segmented training labels data from the Oasis Data set +train_Y = data.load_labels ("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_seg_train") +# Pre process training labels data +train_Y = data.process_labels(train_Y) + +# Load the segmented validation labels data from the Oasis Data set +#validate_Y = data.load_labels("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_seg_validate") +# Pre process validation labels data +#validate_Y = data.process_labels(validate_Y) + +# Load the segmented test labels data from the Oasis Data set +test_Y = data.load_labels("C:/Users/dapmi/OneDrive/Desktop/Data/oa-sis.tar/keras_png_slices_data/keras_png_slices_seg_test") +# Pre process test labels data +test_Y = data.process_labels(test_Y) +#%% +""" MODEL AND TRAIN VQ-VAE """ +# Create a instance of the VQ-VAE model +latent_dimensions = 16 #dimensionality if each latent embedding vector +embeddings_number = 128 #number of embeddings in the codebook + +model = mod.VQVAETRAINER(train_x_var, latent_dimensions, embeddings_number) + +""" +Optimiser -> learning rate +'adam' adjusts learning rate whilst training; learning rate deterines how fast optimal weights are calculated. Smaller +Learning rate = more wights but takes longer to compute +""" +# Create Model +model.compile (optimizer='adam') + +# Train model +history = model.fit(train_X, epochs=5, batch_size=128) +print("disaster!!!!") + + +#%% +# Plot Loss +plt.plot(history.history['reconstruction_loss'], label='Reconstruction Loss') +plt.plot(history.history['loss'], label='Reconstruction Loss') +plt.plot(history.history['vqvae_loss'], label='Reconstruction Loss') +plt.title('VQVAE Reconstruction Loss') +plt.xlabel('Epoch') +plt.ylabel('Loss') +plt.legend(['Training Data', 'Validation'], loc='upper right') +plt.show() + +#%% + +""" MODEL AND TRAIN VQ-VAE """ + +""" RECONSTRUCTION RESULTS""" +# Return the average pixel value for the image and the reconstruction +def calculate_mean(image, reconstructed_image): + image_pixel = 0 + reconstructed_pixel = 0 + + for row in range(256): + for col in range(256): + image_pixel += image[row][col] + reconstructed_pixel += reconstructed_image[row][col] + + image_pixel = image_pixel / (256**2) + reconstructed_pixel = reconstructed_pixel / (256**2) + + return image_pixel, reconstructed_image + +# Returns std dev for the pixel value of each image +def calculate_stddev(image, reconstructed_image, image_mean, reconstructed_image_mean): + + image_variance = 0 + reconstructed_image_variance = 0 + + for row in range(256): + for col in range(256): + image_variance += np.square(image[row][col] - image_mean) + reconstructed_image_variance += np.square(reconstructed_image[row][col] - reconstructed_image_mean) + + image_variance = np.sqrt(image_variance/(256**2 - 1)) + reconstructed_image_variance = np.sqrt(reconstructed_image_variance/(256**2 - 1)) + return image_variance, reconstructed_image_variance + +# Returns the covariance for both images +def calculate_covariance(image, reconstructed_image, image_mean, predicted_mean): + covariance_value = 0 + + for row in range(256): + for col in range(256): + covariance_value += (image[row][col] - image_mean)*(reconstructed_image[row][col] - predicted_mean) + + return covariance_value/(256**256-1) + + +# Return the structural similarity between two images; measures the window x and y of common size. +# https://en.wikipedia.org/wiki/Structural_similarity +def structural_similarity(mean_X, predicted_mean, stddev_X, predicted_stddev, covariance): + K1 = 0.01 # default value + K2 = 0.03 # default value + L = 255 # dynamic range of pixel value (2^bits per pixel -1) + C1 = (K1 * L)**2 + C2 = (K2 * L)**2 + C3 = C2 / 2 + + luminance_x_y = (2*mean_X*predicted_mean + C1)/(mean_X**2+predicted_mean**2+C1) + contrast_x_y = (2*stddev_X*predicted_stddev + C2)/(stddev_X**2+np. predicted_stddev**2+C2) + structure_x_y = (covariance+C3)/(stddev_X*predicted_stddev+C3) + return luminance_x_y * contrast_x_y * structure_x_y + +# Plots the original image against the reconstructed one with their Structured similarity rating +def plot_comparision_original_to_reconstructed(original, reconstructed, ssim): + plt.suptitle("Structured Similiarity Rating: %.2f" %ssim) + + #plt.figure(figsize = (10,12)) + plt.subplot(1, 2, 1) + plt.imshow(original.squeeze() + 0.5, cmap = 'gray') + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(reconstructed.squeeze() + 0.5, cmap = 'gray') + plt.title("Reconstructed") + plt.axis("off") + + plt.show() + +trained_model = model.vqvae_model + +# Select 5 random Test images +idx = np.random.choice(len(test_X), 5) +test_images = test_X[idx] +reconstructions_test = trained_model.predict(test_images) +ssim_mean = 0 +# Perform Predictions on the test images +for test_image, reconstructed_image in zip(test_images, reconstructions_test): + """mean, mean_r = calculate_mean(test_image, reconstructed_image) + stddev, stddev_r = calculate_stddev(test_image,reconstructed_image, mean, mean_r) + cov = calculate_covariance(test_image, reconstructed_image, mean, mean_r) + structured_similiarity_rating = structural_similarity(mean, mean_r, stddev, stddev_r, cov) + """ + structured_similiarity_rating = tf.image.ssim(test_image, reconstructed_image, max_val=1.0) + plot_comparision_original_to_reconstructed(test_image, reconstructed_image, structured_similiarity_rating) + ssim_mean += structured_similiarity_rating + print(structured_similiarity_rating) + +# Calculate the mean structural Similarity for the reconstructed images +print("The average structured similiarity rating is: ", ssim_mean/len(test_images)) + + +"""Visualizing the discrete codes""" +""" +encoder = model.vqvae_model.get_layer("encoder") +quantizer = model.vqvae_model.get_layer("vector_quantizer") + +encoded_outputs = encoder.predict(test_images) +flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) +codebook_indices = quantizer.get_code_indices(flat_enc_outputs) +codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) + +for i in range(len(test_images)): + plt.subplot(1, 2, 1) + plt.imshow(test_images[i].squeeze() + 0.5) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(codebook_indices[i]) + plt.title("Code") + plt.axis("off") + plt.show() + +""" + +""" PIXELCNN Hyperparameters""" +""" +residualblock_num = 2 +pixelcnn_layers = 2 +pixelcnn_input_shape = encoded_outputs.shape[1:-1] +print(f"Input shape of the PixelCNN: {pixelcnn_input_shape}") + + +pixel_model = mod.pixel_model(pixelcnn_input_shape, residualblock_num, pixelcnn_layers, model) + +""" + +""" DATA PREPARATION""" +""" +# Generate the codebook indices. +codebook_indices = data.codebook_indice_generator(train_X, encoder, quantizer) + +""" +""" PixelCNN TRAINING""" +""" +pixel_model.compile( + optimizer=tf.keras.optimizers.Adam(3e-4), + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=["accuracy"], +) +pixel_model.fit( + x=codebook_indices, + y=codebook_indices, + batch_size=128, + epochs=5, + validation_split=0.1, +) + +# Create a mini sampler model. +inputs = tf.keras.layers.Input(shape=pixel_model.input_shape[1:]) +outputs = pixel_model(inputs, training=False) +categorical_layer = tfp.layers.DistributionLambda(tfp.distributions.Categorical) +outputs = categorical_layer(outputs) +sampler = tf.keras.Model(inputs, outputs) + +# Create an empty array of priors. +batch = 10 +priors = np.zeros(shape=(batch,) + (pixel_model.input_shape)[1:]) +batch, rows, cols = priors.shape + +# Iterate over the priors because generation has to be done sequentially pixel by pixel. +for row in range(rows): + for col in range(cols): + # Feed the whole array and retrieving the pixel value probabilities for the next + # pixel. + probs = sampler.predict(priors) + # Use the probabilities to pick pixel values and append the values to the priors. + priors[:, row, col] = probs[:, row, col] + +print(f"Prior shape: {priors.shape}") + +# Perform an embedding lookup. +pretrained_embeddings = quantizer.embeddings +priors_ohe = tf.one_hot(priors.astype("int32"), model.num_embeddings).numpy() +quantized = tf.matmul( + priors_ohe.astype("float32"), pretrained_embeddings, transpose_b=True +) +quantized = tf.reshape(quantized, (-1, *(encoded_outputs.shape[1:]))) + +# Generate novel images. +decoder = model.vqvae.get_layer("decoder") +generated_samples = decoder.predict(quantized) + +for i in range(batch): + plt.subplot(1, 2, 1) + plt.imshow(priors[i]) + plt.title("Code") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(generated_samples[i].squeeze() + 0.5) + plt.title("Generated Sample") + plt.axis("off") + plt.show() + + """ \ No newline at end of file