Projects/NCS1/Evaluation.py

############################################################################################
#
# Project:       Peter Moss Acute Myeloid & Lymphoblastic Leukemia AI Research Project
# Repository:    ALL Detection System 2019
# Project:       Facial Authentication Server
#
# Author:        Adam Milton-Barker (AdamMiltonBarker.com)
# Contributors:
# Title:         Evaluation Class
# Description:   Evaluation class for the ALL Detection System 2019 NCS1 Classifier.
# License:       MIT License
# Last Modified: 2020-07-16
#
############################################################################################

import cv2, json, matplotlib, os, sys, time

import Classes.inception_preprocessing

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import tkinter as tk
import pylab as pl

from tensorflow.python.framework import graph_util
from tensorflow.contrib.framework.python.ops.variables import get_or_create_global_step
from tensorflow.python.platform import tf_logging as logging

from Classes.Helpers import Helpers
from Classes.Data import Data
from Classes.inception_v3 import inception_v3, inception_v3_arg_scope

matplotlib.use("Agg")
plt.style.use('ggplot')
slim = tf.contrib.slim

# config = tf.ConfigProto(intra_op_parallelism_threads=12, inter_op_parallelism_threads=2,
#                        allow_soft_placement=True,  device_count={'CPU': 12})

#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
#os.environ["OMP_NUM_THREADS"] = "12"
#os.environ["KMP_BLOCKTIME"] = "30"
#os.environ["KMP_SETTINGS"] = "1"
#os.environ["KMP_AFFINITY"] = "granularity=fine,verbose,compact,1,0"

class Evaluation():
    """ Evaluation Class

    Evaluates the ALL Detection System 2019 NCS1 Classifier.
    """

    def __init__(self):
        """ Initializes the Evaluation Class """

        self.Helpers = Helpers("Evaluator")
        self.confs = self.Helpers.confs

        self.labelsToName = {}

        self.checkpoint_file = tf.train.latest_checkpoint(
            self.confs["Classifier"]["LogDir"])

        # Open the labels file
        self.labels = open(
            self.confs["Classifier"]["DatasetDir"] + "/" + self.confs["Classifier"]["Labels"], 'r')

        # Create a dictionary to refer each label to their string name
        for line in self.labels:
            label, string_name = line.split(':')
            string_name = string_name[:-1]  # Remove newline
            self.labelsToName[int(label)] = string_name

        # Create a dictionary that will help people understand your dataset better. This is required by the Dataset class later.
        self.items_to_descriptions = {
            'image': 'A 3-channel RGB coloured  image that is ex: office, people',
            'label': 'A label that ,start from zero'
        }

        self.Helpers.logger.info(
            "Evaluator class initialization complete.")

    # ============== DATASET LOADING ======================
    # We now create a function that creates a Dataset class which will give us many TFRecord files to feed in the examples into a queue in parallel.
    def getSplit(self, split_name):
        '''
            Obtains the split - training or validation - to create a Dataset class for feeding the examples into a queue later on. This function will
            set up the decoder and dataset information all into one Dataset class so that you can avoid the brute work later on.
            Your file_pattern is very important in locating the files later.

            INPUTS:
                - split_name(str): 'train' or 'validation'. Used to get the correct data split of tfrecord files

            OUTPUTS:
                - dataset (Dataset): A Dataset class object where we can read its various components for easier batch creation later.
        '''

        # First check whether the split_name is train or validation
        if split_name not in ['train', 'validation']:

            raise ValueError(
                'The split_name %s is not recognized. Please input either train or validation as the split_name' % (split_name))

        # Create the full path for a general file_pattern to locate the tfrecord_files
        file_pattern_path = os.path.join(
            self.confs["Classifier"]["DatasetDir"], self.confs["Classifier"]["TFRecordPattern"] % (split_name))

        # Count the total number of examples in all of these shard
        num_samples = 0
        file_pattern_for_counting = 'ALL_' + split_name
        tfrecords_to_count = [os.path.join(self.confs["Classifier"]["DatasetDir"], file) for file in os.listdir(
            self.confs["Classifier"]["DatasetDir"]) if file.startswith(file_pattern_for_counting)]

        # print(tfrecords_to_count)
        for tfrecord_file in tfrecords_to_count:

            for record in tf.python_io.tf_record_iterator(tfrecord_file):

                num_samples += 1

        # Create a reader, which must be a TFRecord reader in this case
        reader = tf.TFRecordReader

        # Create the keys_to_features dictionary for the decoder
        keys_to_features = {
            'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
            'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'),
            'image/class/label': tf.FixedLenFeature(
                [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
        }

        # Create the items_to_handlers dictionary for the decoder.
        items_to_handlers = {
            'image': slim.tfexample_decoder.Image(),
            'label': slim.tfexample_decoder.Tensor('image/class/label'),
        }

        # Start to create the decoder
        decoder = slim.tfexample_decoder.TFExampleDecoder(
            keys_to_features, items_to_handlers)

        # Create the labels_to_name file
        labels_to_name_dict = self.labelsToName

        # Actually create the dataset
        dataset = slim.dataset.Dataset(
            data_sources=file_pattern_path,
            decoder=decoder,
            reader=reader,
            num_readers=4,
            num_samples=num_samples,
            num_classes=self.confs["Classifier"]["NumClasses"],
            labels_to_name=labels_to_name_dict,
            items_to_descriptions=self.items_to_descriptions)

        return dataset

    def loadBatch(self, dataset, is_training=True):
        '''
            Loads a batch for training.

            INPUTS:
                - dataset(Dataset): a Dataset class object that is created from the get_split function
                - batch_size(int): determines how big of a batch to train
                - height(int): the height of the image to resize to during preprocessing
                - width(int): the width of the image to resize to during preprocessing
                - is_training(bool): to determine whether to perform a training or evaluation preprocessing

            OUTPUTS:
                - images(Tensor): a Tensor of the shape (batch_size, height, width, channels) that contain one batch of images
                - labels(Tensor): the batch's labels with the shape (batch_size,) (requires one_hot_encoding).

        '''

        # First create the data_provider object
        data_provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            common_queue_capacity=24 + 3 *
            self.confs["Classifier"]["BatchTestSize"],
            common_queue_min=24)

        # Obtain the raw image using the get method
        raw_image, label = data_provider.get(['image', 'label'])

        # Perform the correct preprocessing for this image depending if it is training or evaluating
        image = Classes.inception_preprocessing.preprocess_image(
            raw_image, self.confs["Classifier"]["ImageSize"], self.confs["Classifier"]["ImageSize"], is_training)

        # As for the raw images, we just do a simple reshape to batch it up
        raw_image = tf.image.resize_image_with_crop_or_pad(
            raw_image, self.confs["Classifier"]["ImageSize"], self.confs["Classifier"]["ImageSize"])

        # Batch up the image by enqueing the tensors internally in a FIFO queue and dequeueing many elements with tf.train.batch.
        images, raw_images, labels = tf.train.batch(
            [image, raw_image, label],
            batch_size=self.confs["Classifier"]["BatchTestSize"],
            num_threads=4,
            capacity=4 * self.confs["Classifier"]["BatchTestSize"],
            allow_smaller_final_batch=True)

        return images, raw_images, labels


Evaluation = Evaluation()


def run():

    # Create LogDirEval for evaluation information
    if not os.path.exists(Evaluation.confs["Classifier"]["LogDirEval"]):
        os.mkdir(Evaluation.confs["Classifier"]["LogDirEval"])

    # Just construct the graph from scratch again
    with tf.Graph().as_default() as graph:

        tf.logging.set_verbosity(tf.logging.INFO)

        # Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing
        dataset = Evaluation.getSplit('validation')
        images, raw_images, labels = Evaluation.loadBatch(dataset, is_training=False)

        # Create some information about the training steps
        num_batches_per_epoch = dataset.num_samples / \
            Evaluation.confs["Classifier"]["BatchTestSize"]
        num_steps_per_epoch = num_batches_per_epoch

        # Now create the inference model but set is_training=False
        with slim.arg_scope(inception_v3_arg_scope()):
            logits, end_points = inception_v3(
                images, num_classes=dataset.num_classes, is_training=False)

        # Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)
        one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)

        # Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks
        loss = tf.losses.softmax_cross_entropy(
            onehot_labels=one_hot_labels, logits=logits)
        # obtain the regularization losses as well
        total_loss = tf.losses.get_total_loss()

        # #get all the variables to restore from the checkpoint file and create the saver function to restore
        variables_to_restore = slim.get_variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        def restore_fn(sess):
            return saver.restore(sess, Evaluation.checkpoint_file)

        # Just define the metrics to track without the loss or whatsoever
        probabilities = end_points['Predictions']
        predictions = tf.argmax(probabilities, 1)

        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(
            predictions, labels)
        metrics_op = tf.group(accuracy_update)

        # Create the global step and an increment op for monitoring
        global_step = get_or_create_global_step()
        # no apply_gradient method so manually increasing the global_step
        global_step_op = tf.assign(global_step, global_step + 1)

        # Create a evaluation step function
        def eval_step(sess, metrics_op, global_step):
            '''
            Simply takes in a session, runs the metrics op and some logging information.
            '''
            start_time = time.time()
            _, global_step_count, accuracy_value = sess.run(
                [metrics_op, global_step_op, accuracy])
            time_elapsed = time.time() - start_time

            # Log some information
            logging.info('Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)',
                         global_step_count, accuracy_value, time_elapsed)

            return accuracy_value

        # Define some scalar quantities to monitor
        tf.summary.scalar("Validation_Accuracy", accuracy)
        tf.summary.scalar("Validation_losses/Total_Loss", total_loss)
        my_summary_op = tf.summary.merge_all()

        # Get your supervisor
        sv = tf.train.Supervisor(
            logdir=Evaluation.confs["Classifier"]["LogDirEval"], summary_op=None, init_fn=restore_fn)

        # Now we are ready to run in one session
        with sv.managed_session() as sess:
            for step in range(int(num_batches_per_epoch * Evaluation.confs["Classifier"]["EpochsTest"])):
                # print vital information every start of the epoch as always
                if step % num_batches_per_epoch == 0:
                    logging.info('Epoch: %s/%s', step / num_batches_per_epoch + 1,
                                 Evaluation.confs["Classifier"]["EpochsTest"])
                    logging.info('Current Streaming Accuracy: %.4f',
                                 sess.run(accuracy))

                # Compute summaries every 10 steps and continue evaluating
                if step % 10 == 0:
                    eval_step(sess, metrics_op=metrics_op,
                              global_step=sv.global_step)
                    summaries = sess.run(my_summary_op)
                    sv.summary_computed(sess, summaries)

                # Otherwise just run as per normal
                else:
                    eval_step(sess, metrics_op=metrics_op,
                              global_step=sv.global_step)

            # At the end of all the evaluation, show the final accuracy
            logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy))

            # Now we want to visualize the last batch's images just to see what our model has predicted
            raw_images, labels, predictions, probabilities = sess.run(
                [raw_images, labels, predictions, probabilities])
            for i in range(10):
                image, label, prediction, probability = raw_images[
                    i], labels[i], predictions[i], probabilities[i]
                prediction_name, label_name = dataset.labels_to_name[
                    prediction], dataset.labels_to_name[label]
                text = 'Prediction: %s \n Ground Truth: %s \n Probability: %s' % (
                    prediction_name, label_name, probability[prediction])
                img_plot = plt.imshow(image)

                # Set up the plot and hide axes
                # plt.title(text)
                # img_plot.axes.get_yaxis().set_ticks([])
                # img_plot.axes.get_xaxis().set_ticks([])
                # plt.show()

            logging.info(
                'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.')
            sv.saver.save(sess, sv.save_path, global_step=sv.global_step)


if __name__ == '__main__':
    run()